framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,bfloat16,1,0.12747999429702758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,fp8,1,0.15500639677047728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,bfloat16,3,0.12750719785690307
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,fp8,3,0.15482399463653565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,fp8,7,0.15493119955062867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,bfloat16,15,0.1283743977546692
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,fp8,15,0.15521119832992553
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,bfloat16,31,0.13381919860839844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,fp8,31,0.15475200414657592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,bfloat16,63,0.1377408027648926
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,fp8,63,0.16564160585403442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,bfloat16,127,0.1433087944984436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,fp8,127,0.17233439683914184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,bfloat16,255,0.21009440422058107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,bfloat16,7,0.12719520330429077
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,fp8,255,0.21763200759887696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,fp8,511,0.33966240882873533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,bfloat16,1023,0.7151296138763428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,fp8,1023,0.5371424198150635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,bfloat16,511,0.38323359489440917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,bfloat16,2047,1.396134376525879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,bfloat16,1,0.0135903999209404
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,fp8,1,0.013606399297714233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,bfloat16,3,0.01353919953107834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,fp8,3,0.013643200695514678
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,bfloat16,7,0.014019200205802917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,fp8,7,0.014204800128936768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,bfloat16,fp8,2047,0.9062159538269043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,bfloat16,15,0.013844799995422364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,bfloat16,31,0.01374559998512268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,fp8,31,0.013606399297714233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,bfloat16,63,0.013633599877357483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,fp8,63,0.013675199449062347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,bfloat16,127,0.013681599497795105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,fp8,127,0.013644799590110779
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,bfloat16,255,0.015535999834537507
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,fp8,255,0.015646399557590486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,bfloat16,511,0.01851679980754852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,fp8,511,0.019262400269508363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,bfloat16,1023,0.02256480008363724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,fp8,1023,0.021695999801158904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,bfloat16,2047,0.03976959884166718
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,fp8,2047,0.02876800000667572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,bfloat16,1,0.013488000631332398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,fp8,1,0.013486400246620178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,bfloat16,3,0.01343040019273758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,fp8,3,0.0135343998670578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,bfloat16,7,0.013500800728797913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,fp8,7,0.01353600025177002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,bfloat16,15,0.0135343998670578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,fp8,15,0.013568000495433807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,bfloat16,31,0.013471999764442444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,fp8,31,0.013488000631332398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,bfloat16,63,0.013331200182437896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,fp8,63,0.013307200372219085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,bfloat16,127,0.013447999954223633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,fp8,127,0.0135343998670578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,bfloat16,255,0.01539359986782074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,fp8,255,0.015484799444675446
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,bfloat16,511,0.020316800475120543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,fp8,511,0.01916159987449646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,bfloat16,1023,0.03840480148792267
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,fp8,1023,0.02715519964694977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,bfloat16,fp8,15,0.0133200004696846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,bfloat16,2047,0.060438400506973265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,bfloat16,fp8,2047,0.04153119921684265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,bfloat16,1,0.0703440010547638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,fp8,1,0.08161439895629882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,bfloat16,3,0.0702239990234375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,fp8,3,0.08158400058746337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,bfloat16,7,0.07062559723854064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,fp8,7,0.08169919848442078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,bfloat16,15,0.07078400254249573
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,bfloat16,31,0.07121599912643432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,fp8,31,0.08231679797172546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,fp8,15,0.08180959820747376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,bfloat16,63,0.07692639827728272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,bfloat16,127,0.08004800081253052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,fp8,127,0.09338560104370117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,fp8,63,0.08428320288658142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,bfloat16,255,0.11749600172042847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,fp8,255,0.11812479496002197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,bfloat16,511,0.20164320468902588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,fp8,511,0.17761119604110717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,bfloat16,1023,0.3689824104309082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,fp8,1023,0.27768640518188475
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,bfloat16,3,0.017127999663352968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,fp8,3,0.017574399709701538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,bfloat16,15,0.017260800302028655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,fp8,15,0.017528000473976135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,bfloat16,31,0.017263999581336974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,fp8,31,0.017574399709701538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,bfloat16,63,0.01733119934797287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,fp8,63,0.017641599476337432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,bfloat16,127,0.017504000663757326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,fp8,127,0.01767359972000122
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,bfloat16,255,0.02404319941997528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,fp8,255,0.021775999665260316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,bfloat16,511,0.039683198928833006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,bfloat16,1023,0.0625104010105133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,bfloat16,1,0.017543999850749968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,bfloat16,7,0.017236800491809846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,fp8,7,0.01791680008172989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,bfloat16,2047,0.10560959577560425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,fp8,1023,0.0463919997215271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,fp8,2047,0.06903839707374573
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,fp8,2047,0.460811185836792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,bfloat16,1,0.23957440853118897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,bfloat16,bfloat16,2047,0.7055840015411377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,fp8,1,0.29850239753723146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,bfloat16,7,0.23975200653076173
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,bfloat16,3,0.23947999477386475
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,fp8,511,0.029019200801849367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,fp8,3,0.2987823963165283
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,bfloat16,15,0.24633920192718506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,bfloat16,31,0.254915189743042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,fp8,7,0.2967184066772461
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,fp8,31,0.3112704038619995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,fp8,15,0.29581599235534667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,fp8,63,0.3148751974105835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,bfloat16,127,0.2667344093322754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,bfloat16,63,0.25697600841522217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,fp8,127,0.32918879985809324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,bfloat16,255,0.3951407909393311
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,fp8,255,0.4214943885803223
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,bfloat16,511,0.7537424087524414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,fp8,511,0.6528607845306397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,bfloat16,1,0.46344637870788574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,fp8,1,0.5779839992523194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,bfloat16,3,0.46004161834716795
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,bfloat16,7,0.47420320510864256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,fp8,1023,1.0489583969116212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,fp8,3,0.581006383895874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,fp8,7,0.5789135932922364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,bfloat16,fp8,1,0.017819200456142426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,bfloat16,bfloat16,1023,1.4106847763061523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,bfloat16,15,0.48465762138366697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,fp8,15,0.5826640129089355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,bfloat16,63,0.4902207851409912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,fp8,31,0.6095568180084229
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,bfloat16,31,0.4899439811706543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,fp8,63,0.6098351955413819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,bfloat16,127,0.5432176113128662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,fp8,127,0.6457056045532227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,bfloat16,255,0.7677103996276855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,bfloat16,1,0.9564127922058105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,bfloat16,fp8,255,0.8270815849304199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,fp8,1,1.1065967559814454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,bfloat16,3,0.9567824363708496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,bfloat16,7,0.9486911773681641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,fp8,3,1.1049519538879395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,bfloat16,15,0.9497584342956543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,fp8,7,1.1474335670471192
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,bfloat16,1,0.024966399371623992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,fp8,15,1.169985580444336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,bfloat16,31,0.9509424209594727
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,fp8,1,0.027131199836730957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,bfloat16,3,0.02475679963827133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,bfloat16,7,0.025121599435806274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,fp8,3,0.026361599564552307
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,fp8,31,1.1754768371582032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,bfloat16,15,0.02518239915370941
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,fp8,15,0.026398399472236635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,bfloat16,63,0.9696352005004882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,bfloat16,31,0.024372799694538115
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,fp8,31,0.027246400713920593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,bfloat16,63,0.02447039932012558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,bfloat16,127,0.027432000637054442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,fp8,127,0.027289599180221558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,fp8,63,0.026398399472236635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,bfloat16,127,1.0045951843261718
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,fp8,255,0.035036799311637876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,bfloat16,511,0.06424959897994995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,fp8,511,0.05227839946746826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,fp8,1023,0.07794719934463501
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,bfloat16,1023,0.12072800397872925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,bfloat16,2047,0.1910640001296997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,fp8,127,1.2613120079040527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,fp8,2047,0.12253760099411011
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,bfloat16,1,1.866783905029297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,bfloat16,3,1.8965648651123046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,bfloat16,7,1.876540756225586
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,fp8,1,2.332558441162109
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,fp8,3,2.297097587585449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,fp8,7,0.026363199949264525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,bfloat16,fp8,63,1.167670440673828
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,bfloat16,15,1.8894800186157226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,fp8,7,2.316526412963867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,bfloat16,31,1.8704496383666993
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,fp8,15,2.3124847412109375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,bfloat16,63,1.8974143981933593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,fp8,31,2.2928815841674806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,bfloat16,fp8,63,2.2976608276367188
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,bfloat16,bfloat16,255,0.04358560144901276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,bfloat16,bfloat16,1,3.7253646850585938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,bfloat16,bfloat16,3,3.6957183837890626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,bfloat16,fp8,1,4.617177581787109
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,bfloat16,1,0.03883680105209351
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,bfloat16,bfloat16,7,3.734588623046875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,fp8,1,0.043721601366996765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,bfloat16,fp8,3,4.601326370239258
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,bfloat16,3,0.03901439905166626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,bfloat16,7,0.03897440135478973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,fp8,3,0.04392800033092499
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,fp8,7,0.04387359917163849
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,bfloat16,15,0.038731199502944944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,fp8,15,0.043905600905418396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,bfloat16,31,0.038726401329040525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,bfloat16,bfloat16,15,3.7120880126953124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,bfloat16,fp8,7,4.570553588867187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,fp8,31,0.04391840100288391
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,bfloat16,63,0.04176799952983856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,fp8,127,0.046265599131584165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,bfloat16,127,0.04657599925994873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,fp8,63,0.043244799971580504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,bfloat16,255,0.07024000287055969
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,fp8,255,0.06383360028266907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,bfloat16,511,0.10995039939880372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,fp8,511,0.09233120083808899
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,fp8,1023,0.14289599657058716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,bfloat16,1023,0.19553760290145875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,fp8,2047,0.23168959617614746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,bfloat16,1,0.0966816008090973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,bfloat16,bfloat16,2047,0.3608128070831299
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,fp8,1,0.11437760591506958
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,fp8,3,0.11480159759521484
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,bfloat16,3,0.09552000164985656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,fp8,7,0.11424479484558106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,bfloat16,15,0.09680479764938354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,bfloat16,7,0.0953823983669281
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,fp8,15,0.11352159976959228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,bfloat16,31,0.09858880043029786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,fp8,31,0.11512160301208496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,bfloat16,127,0.10946240425109863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,fp8,127,0.1306447982788086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,bfloat16,bfloat16,31,3.72991828918457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,bfloat16,fp8,15,4.6801200866699215
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,bfloat16,255,0.1661247968673706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,fp8,255,0.16695359945297242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,fp8,511,0.25694239139556885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,fp8,1023,0.4034111976623535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,bfloat16,1023,0.5430575847625733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,bfloat16,fp8,31,4.580748748779297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,bfloat16,1,0.012545600533485413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,bfloat16,3,0.012515200674533844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,fp8,1,0.012876799702644348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,fp8,3,0.012856000661849975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,fp8,7,0.012811200320720672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,bfloat16,7,0.012591999769210816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,bfloat16,15,0.012588800489902496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,fp8,15,0.012923200428485871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,fp8,31,0.012811200320720672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,fp8,2047,0.6765232086181641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,bfloat16,63,0.012561599910259246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,bfloat16,127,0.012595200538635254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,fp8,63,0.012742400169372559
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,fp8,127,0.01273919939994812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,fp8,255,0.014681600034236908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,bfloat16,255,0.014244799315929414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,bfloat16,511,0.01724800020456314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,fp8,511,0.018089599907398224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,bfloat16,1023,0.019564799964427948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,fp8,1023,0.02289759963750839
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,bfloat16,2047,0.039094400405883786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,fp8,2047,0.030451199412345885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,bfloat16,1,0.012742400169372559
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,fp8,1,0.013169600069522858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,bfloat16,3,0.012716799974441528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,fp8,3,0.012912000715732574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,bfloat16,7,0.012727999687194824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,fp8,7,0.012894399464130402
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,bfloat16,15,0.01265760064125061
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,fp8,15,0.01292639970779419
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,bfloat16,31,0.012724800407886505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,fp8,31,0.012923200428485871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,bfloat16,63,0.10351519584655762
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,bfloat16,63,0.012780800461769104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,fp8,63,0.012700800597667695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,fp8,127,0.013043199479579926
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,fp8,63,0.12144479751586915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,bfloat16,255,0.014336000382900237
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,fp8,255,0.014958399534225463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,bfloat16,511,0.01815840005874634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,bfloat16,1023,0.035499200224876404
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,fp8,1023,0.03003840148448944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,bfloat16,2047,0.05270559787750244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,fp8,2047,0.04551199972629547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,bfloat16,1,0.053174400329589845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,bfloat16,3,0.05545920133590698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,bfloat16,7,0.05549280047416687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,fp8,1,0.06268960237503052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,fp8,7,0.06409760117530823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,fp8,15,0.06415839791297913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,bfloat16,15,0.05460799932479858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,bfloat16,31,0.05562080144882202
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,fp8,31,0.0635424017906189
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,bfloat16,63,0.060648000240325926
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,fp8,63,0.06417279839515685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,bfloat16,127,0.06400960087776184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,fp8,127,0.07191039919853211
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,bfloat16,255,0.09461920261383057
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,fp8,255,0.09242079854011535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,bfloat16,bfloat16,31,0.012587200105190276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,bfloat16,511,0.15705599784851074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,bfloat16,2047,1.052348804473877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,fp8,511,0.13851200342178344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,bfloat16,1023,0.2830735921859741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,fp8,1023,0.2219871997833252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,bfloat16,1,0.017046399414539337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,bfloat16,2047,0.5368192195892334
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,fp8,1,0.017803199589252472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,bfloat16,3,0.016982400417327882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,fp8,3,0.017694400250911714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,bfloat16,7,0.01698880046606064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,fp8,2047,0.35207040309906007
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,fp8,7,0.017740799486637114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,bfloat16,15,0.016942399740219116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,bfloat16,31,0.01696320027112961
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,fp8,15,0.017449599504470826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,fp8,31,0.0176256000995636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,bfloat16,127,0.01313920021057129
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,bfloat16,63,0.01698399931192398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,bfloat16,127,0.01720000058412552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,fp8,63,0.017476800084114074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,bfloat16,255,0.020902399718761445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,fp8,255,0.021950399875640868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,bfloat16,fp8,511,0.01823839992284775
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,bfloat16,511,0.03452160060405731
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,fp8,511,0.029311999678611755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,bfloat16,1023,0.05313599705696106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,fp8,1023,0.04621599912643433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,bfloat16,2047,0.08694559931755066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,fp8,2047,0.07232800126075745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,bfloat16,1,0.1852671980857849
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,bfloat16,fp8,3,0.06140000224113464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,bfloat16,3,0.1846560001373291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,bfloat16,bfloat16,511,0.30130720138549805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,fp8,1,0.22635838985443116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,fp8,3,0.22787039279937743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,bfloat16,7,0.18525439500808716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,bfloat16,15,0.18530240058898925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,fp8,7,0.22110559940338134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,bfloat16,31,0.19650399684906006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,bfloat16,63,0.1981343984603882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,fp8,31,0.2314176082611084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,fp8,15,0.22476639747619628
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,fp8,63,0.23978080749511718
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,bfloat16,127,0.2028815984725952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,fp8,127,0.2552175998687744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,bfloat16,255,0.3073951959609985
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,fp8,255,0.32642240524291993
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,bfloat16,1,0.35470879077911377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,bfloat16,511,0.5757472038269043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,fp8,511,0.4969679832458496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,fp8,1,0.4451791763305664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,bfloat16,3,0.3541807889938354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,bfloat16,7,0.35690879821777344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,fp8,1023,0.7977615833282471
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,fp8,3,0.4387551784515381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,fp8,7,0.44589757919311523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,bfloat16,15,0.3654128074645996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,fp8,15,0.43768959045410155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,bfloat16,63,0.3792367935180664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,fp8,31,0.46123361587524414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,bfloat16,31,0.3751408100128174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,fp8,63,0.4650576114654541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,bfloat16,fp8,127,0.01748639941215515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,bfloat16,255,0.5832687854766846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,bfloat16,1,0.7100192070007324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,fp8,255,0.6209184169769287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,fp8,1,0.8677472114562989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,bfloat16,3,0.7121295928955078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,bfloat16,7,0.7173727989196778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,fp8,3,0.8380847930908203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,bfloat16,15,0.7199215888977051
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,fp8,7,0.8324224472045898
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,fp8,15,0.9084079742431641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,bfloat16,31,0.7206895828247071
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,bfloat16,63,0.7263008117675781
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,fp8,31,0.8887871742248535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,bfloat16,1,0.020547200739383698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,fp8,63,0.886302375793457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,fp8,1,0.02184160053730011
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,bfloat16,3,0.02053920030593872
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,bfloat16,bfloat16,1023,1.0607104301452637
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,bfloat16,7,0.020440000295639037
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,fp8,7,0.021932800114154816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,fp8,15,0.021958400309085847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,bfloat16,31,0.02064319998025894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,bfloat16,15,0.02131039947271347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,fp8,31,0.022023999691009523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,bfloat16,63,0.020603199303150178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,bfloat16,127,0.02123199999332428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,fp8,127,0.021982400119304656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,fp8,127,0.952064037322998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,fp8,255,0.027982398867607117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,bfloat16,127,0.38860321044921875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,fp8,511,0.04195840060710907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,bfloat16,511,0.055959999561309814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,bfloat16,1023,0.08690879940986633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,fp8,1023,0.06155040264129639
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,bfloat16,fp8,127,0.4942624092102051
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,bfloat16,2047,0.14761919975280763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,fp8,2047,0.09685760140419006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,bfloat16,3,1.411017608642578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,fp8,1,1.7083023071289063
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,fp8,3,0.021648000180721282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,bfloat16,bfloat16,127,0.7659567832946778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,fp8,63,0.02259040027856827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,bfloat16,bfloat16,255,0.035515201091766355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,bfloat16,7,1.4049375534057618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,fp8,3,1.7253503799438477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,bfloat16,15,1.4213711738586425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,fp8,7,1.7417423248291015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,bfloat16,31,1.4234911918640136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,bfloat16,63,1.4193951606750488
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,fp8,31,1.7460399627685548
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,bfloat16,1,1.4142864227294922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,fp8,63,1.7732112884521485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,bfloat16,bfloat16,1,2.7910272598266603
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,bfloat16,bfloat16,3,2.7866687774658203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,bfloat16,bfloat16,7,2.822742462158203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,bfloat16,fp8,1,3.4934432983398436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,bfloat16,1,0.03148959875106812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,fp8,1,0.034985598921775815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,bfloat16,fp8,15,1.8012592315673828
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,bfloat16,fp8,3,3.4769439697265625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,bfloat16,3,0.031481599807739256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,fp8,7,0.03498240113258362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,bfloat16,7,0.030865600705146788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,fp8,3,0.03512639999389648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,bfloat16,15,0.031435200572013856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,fp8,15,0.034729599952697754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,bfloat16,31,0.031488001346588135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,fp8,31,0.03492479920387268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,bfloat16,63,0.03118239939212799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,fp8,63,0.03516159951686859
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,bfloat16,127,0.03805600106716156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,bfloat16,255,0.057284802198410034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,fp8,255,0.05055360198020935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,bfloat16,511,0.08680319786071777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,fp8,511,0.0724560022354126
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,bfloat16,1023,0.15016800165176392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,fp8,1023,0.11147680282592773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,bfloat16,bfloat16,15,2.8431520462036133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,bfloat16,fp8,7,3.42401123046875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,fp8,2047,0.18091520071029663
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,bfloat16,2047,0.2764528036117554
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,bfloat16,1,0.08310080170631409
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,bfloat16,7,0.08297600150108338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,bfloat16,3,0.08490880131721497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,fp8,3,0.09968159794807434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,bfloat16,bfloat16,31,2.793657684326172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,bfloat16,fp8,15,3.4646224975585938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,bfloat16,15,0.08509439826011658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,fp8,7,0.09874079823493957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,fp8,15,0.09975200295448303
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,bfloat16,63,0.09174399971961975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,fp8,63,0.10568959712982177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,bfloat16,31,0.08403199911117554
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,bfloat16,127,0.09419040083885193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,fp8,31,0.09777759909629821
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,fp8,127,0.11624480485916137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,bfloat16,255,0.14323999881744384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,bfloat16,511,0.24756159782409667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,fp8,255,0.14227839708328247
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,fp8,511,0.21851038932800293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,bfloat16,fp8,31,3.442118453979492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,bfloat16,1,0.012348800152540206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,bfloat16,1023,0.4553520202636719
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,fp8,1023,0.3422976016998291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,bfloat16,3,0.012337599694728852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,fp8,1,0.012807999551296235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,fp8,7,0.01310880035161972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,fp8,3,0.013417600095272065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,fp8,15,0.013075199723243714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,bfloat16,31,0.012729600071907043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,bfloat16,15,0.01213119998574257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,fp8,31,0.013118399679660797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,bfloat16,63,0.012761600315570831
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,fp8,63,0.012825599312782288
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,bfloat16,127,0.012417600303888322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,fp8,127,0.013495999574661254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,bfloat16,255,0.01383039951324463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,fp8,255,0.015030400454998016
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,bfloat16,511,0.017643199861049653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,fp8,511,0.018400000035762788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,bfloat16,1023,0.0179967999458313
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,fp8,1023,0.020001600682735442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,bfloat16,fp8,127,0.03528479933738708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,bfloat16,2047,0.8762864112854004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,bfloat16,2047,0.02534399926662445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,bfloat16,1,0.01295360028743744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,fp8,2047,0.02301120012998581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,bfloat16,7,0.012937599420547485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,fp8,3,0.013079999387264252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,fp8,7,0.01324480026960373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,fp8,1,0.013184000551700593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,bfloat16,15,0.012863999605178833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,fp8,15,0.013059200346469879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,bfloat16,31,0.01233920007944107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,fp8,31,0.013179199397563934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,bfloat16,63,0.012828800082206725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,fp8,63,0.013076800107955932
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,fp8,127,0.01363999992609024
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,bfloat16,255,0.014425599575042724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,fp8,255,0.015603199601173401
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,bfloat16,127,0.012535999715328216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,bfloat16,511,0.017539200186729432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,fp8,511,0.019276799261569978
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,bfloat16,1023,0.03119199872016907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,fp8,1023,0.028966400027275085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,bfloat16,2047,0.05228959918022156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,fp8,2047,0.04358879923820495
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,bfloat16,1,0.04604319930076599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,fp8,1,0.05545759797096252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,bfloat16,3,0.045798400044441225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,fp8,1,0.09645119905471802
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,bfloat16,7,0.04774560034275055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,fp8,15,0.05448160171508789
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,bfloat16,15,0.045719999074935916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,bfloat16,31,0.047147199511528015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,fp8,31,0.052799999713897705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,bfloat16,63,0.05057280063629151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,fp8,63,0.0527072012424469
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,bfloat16,127,0.05518400073051453
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,fp8,127,0.05887519717216492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,bfloat16,255,0.08317440152168273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,fp8,255,0.07852320075035095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,bfloat16,511,0.13256479501724244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,fp8,511,0.11589280366897584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,bfloat16,1023,0.23870880603790284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,fp8,1023,0.17828160524368286
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,bfloat16,bfloat16,7,0.012425599992275238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,bfloat16,1,0.01621119976043701
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,fp8,2047,0.29190878868103026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,fp8,1,0.017504000663757326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,bfloat16,fp8,2047,0.5672448158264161
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,bfloat16,2047,0.445468807220459
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,fp8,3,0.017531199753284453
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,bfloat16,3,0.01621920019388199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,bfloat16,15,0.01618400067090988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,fp8,15,0.017481599748134614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,bfloat16,7,0.01650719940662384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,fp8,7,0.017718400061130523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,bfloat16,31,0.016176000237464905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,fp8,31,0.01746399998664856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,bfloat16,127,0.016356800496578217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,fp8,63,0.017744000256061553
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,fp8,127,0.017817600071430205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,bfloat16,255,0.02000479996204376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,fp8,255,0.021780799329280853
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,bfloat16,511,0.02985759973526001
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,fp8,511,0.02887200117111206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,bfloat16,1023,0.05145919919013977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,fp8,1023,0.045396798849105836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,bfloat16,2047,0.08519200086593628
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,fp8,2047,0.07153599858283996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,bfloat16,1,0.15254559516906738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,bfloat16,3,0.15302400588989257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,fp8,1,0.18821920156478883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,fp8,3,0.18496160507202147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,bfloat16,7,0.15308959484100343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,fp8,3,0.053200000524520875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,fp8,7,0.1882912039756775
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,bfloat16,fp8,7,0.05314720273017883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,bfloat16,15,0.15315040349960327
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,fp8,15,0.18918559551239014
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,bfloat16,31,0.16243360042572022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,fp8,31,0.18967519998550414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,bfloat16,63,0.1637712001800537
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,fp8,63,0.20108160972595215
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,bfloat16,127,0.1738800048828125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,fp8,127,0.21063039302825928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,bfloat16,255,0.2573024034500122
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,fp8,255,0.267742395401001
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,bfloat16,511,0.4822688102722168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,bfloat16,1,0.2927664041519165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,fp8,511,0.4148240089416504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,bfloat16,3,0.29258880615234373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,fp8,1,0.36842401027679444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,fp8,3,0.36786880493164065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,fp8,1023,0.6572879791259766
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,bfloat16,7,0.2930815935134888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,fp8,7,0.3695440053939819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,bfloat16,15,0.3064271926879883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,bfloat16,31,0.3100672006607056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,fp8,15,0.36531519889831543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,bfloat16,bfloat16,63,0.01658719927072525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,bfloat16,63,0.3153808116912842
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,fp8,31,0.3891727924346924
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,bfloat16,bfloat16,3,0.012932799756526947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,fp8,63,0.385148811340332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,bfloat16,127,0.32811200618743896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,fp8,127,0.41176319122314453
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,bfloat16,255,0.49326558113098146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,bfloat16,1,0.588705587387085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,bfloat16,3,0.586678409576416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,fp8,1,0.7164512157440186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,fp8,3,0.7119040012359619
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,bfloat16,7,0.6055295944213868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,bfloat16,15,0.621507215499878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,fp8,7,0.7082831859588623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,bfloat16,31,0.6047679901123046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,fp8,15,0.7385104179382325
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,fp8,31,0.7655248165130615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,bfloat16,1,0.02006080001592636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,bfloat16,63,0.6246607780456543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,fp8,1,0.02197919934988022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,bfloat16,3,0.020950399339199066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,fp8,3,0.021996800601482392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,fp8,63,0.7624000072479248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,bfloat16,7,0.021028800308704375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,fp8,7,0.0220208004117012
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,bfloat16,127,0.6339136123657226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,bfloat16,31,0.020129600167274476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,bfloat16,15,0.020657600462436677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,bfloat16,63,0.02014559954404831
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,fp8,63,0.022697600722312927
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,bfloat16,bfloat16,1023,0.8845744132995605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,bfloat16,127,0.020744000375270844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,fp8,127,0.022700800001621245
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,fp8,255,0.027774399518966673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,bfloat16,255,0.030740800499916076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,fp8,511,0.041606399416923526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,bfloat16,1023,0.07893279790878296
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,fp8,1023,0.0629647970199585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,bfloat16,2047,0.13101439476013182
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,fp8,2047,0.09856160283088684
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,bfloat16,fp8,255,0.5259552001953125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,bfloat16,1,1.1896544456481934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,fp8,1,1.3906543731689454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,bfloat16,3,1.1871055603027343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,fp8,3,1.4051424026489259
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,bfloat16,7,1.1767328262329102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,fp8,15,0.02298080027103424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,bfloat16,fp8,127,0.8121376037597656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,bfloat16,511,0.04629279971122742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,fp8,7,1.4312591552734375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,bfloat16,15,1.1863759994506835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,bfloat16,31,1.1908096313476562
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,fp8,15,1.4519231796264649
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,fp8,31,1.4696736335754395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,bfloat16,63,1.1992464065551758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,bfloat16,fp8,63,1.4907535552978515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,bfloat16,bfloat16,1,2.3232303619384767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,bfloat16,bfloat16,3,2.35436954498291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,bfloat16,fp8,31,0.021724799275398256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,bfloat16,fp8,1,2.924777603149414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,bfloat16,bfloat16,7,2.352019119262695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,bfloat16,1,0.02725600004196167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,bfloat16,3,0.027224001288414002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,fp8,1,0.03125280141830444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,bfloat16,fp8,3,2.8883392333984377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,bfloat16,7,0.02802880108356476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,fp8,3,0.03086079955101013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,fp8,7,0.03086720108985901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,bfloat16,bfloat16,15,2.341851234436035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,fp8,15,0.030910399556159974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,bfloat16,31,0.027643200755119324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,bfloat16,63,0.027432000637054442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,fp8,31,0.03071039915084839
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,bfloat16,fp8,7,2.931233596801758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,fp8,127,0.030807998776435853
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,bfloat16,255,0.04989280104637146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,fp8,255,0.043838399648666385
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,bfloat16,511,0.07458080053329467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,fp8,511,0.06281440258026123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,bfloat16,1023,0.1283504009246826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,fp8,1023,0.09575999975204467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,fp8,2047,0.1521056056022644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,bfloat16,2047,0.23124639987945556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,1,0.06778560280799865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,3,0.06772159934043884
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,1,0.0791808009147644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,bfloat16,fp8,15,2.8517791748046877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,bfloat16,bfloat16,31,2.3302431106567383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,7,0.06771360039710998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,3,0.07920160293579101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,15,0.0676144003868103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,15,0.07937600016593933
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,31,0.06841279864311219
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,63,0.07426080107688904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,63,0.08174239993095397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,31,0.07929919958114624
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,127,0.07731199860572815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,127,0.09114720225334168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,255,0.11859999895095825
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,255,0.1155776023864746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,511,0.20212318897247314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,511,0.17306079864501953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,1023,0.36519200801849366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,1023,0.2707632064819336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,bfloat16,15,0.0279664009809494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,2047,0.7024816036224365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,fp8,63,0.030771198868751525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,2047,0.4559199810028076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,1,0.012652799487113953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,bfloat16,bfloat16,127,0.03304480016231537
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,3,0.012036799639463424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,7,0.012201599776744843
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,3,0.012929600477218629
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,15,0.012695999443531036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,7,0.012600000202655792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,15,0.013148799538612366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,31,0.013128000497817992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,63,0.012736000120639801
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,63,0.013006399571895599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,127,0.012563200294971466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,127,0.012936000525951386
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,255,0.014446400105953217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,bfloat16,4095,1.3903776168823243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,4095,0.8541728019714355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,255,0.01494400054216385
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,1023,0.01834239959716797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,511,0.017347200214862822
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,511,0.01930239945650101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,1023,0.01765599995851517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,2047,0.021303999423980712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,2047,0.022123199701309205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,4095,0.03811039924621582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,bfloat16,fp8,7,0.08021119832992554
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,1,0.013116799294948578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,1,0.013036799430847169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,3,0.01226079985499382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,3,0.01311040073633194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,7,0.013076800107955932
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,7,0.01279360055923462
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,15,0.012670400738716125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,15,0.013612799346446991
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,31,0.012368000298738479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,31,0.013055999577045441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,63,0.01316000074148178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,63,0.012692800164222718
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,127,0.012643200159072877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,127,0.013583999872207642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,255,0.013953599333763122
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,255,0.01509920060634613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,511,0.018348799645900728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,511,0.018068799376487733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,1023,0.021803200244903564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,1023,0.021353599429130555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,2047,0.03751519918441772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,2047,0.027611199021339416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,bfloat16,4095,0.05983359813690185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,bfloat16,fp8,4095,0.041131201386451724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,1,0.040387201309204104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,1,0.04580479860305786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,3,0.03862079977989197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,3,0.0459087997674942
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,7,0.03950720131397247
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,7,0.04404320120811463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,15,0.04037120044231415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,15,0.04521439969539642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,31,0.03870719969272614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,31,0.04595040082931519
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,63,0.04071680009365082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,63,0.044121599197387694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,127,0.047056001424789426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,127,0.04736000001430511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,255,0.06975520253181458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,255,0.0650160014629364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,511,0.11030399799346924
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,511,0.09276319742202759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,1023,0.19502559900283814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,1,0.012700800597667695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,1023,0.14540799856185913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,2047,0.35946719646453856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,2047,0.23428959846496583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,1,0.012278400361537933
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,1,0.013147200644016265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,3,0.012265600264072418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,3,0.013100799918174744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,7,0.01228479966521263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,7,0.013187199831008911
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,15,0.012294399738311767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,15,0.013279999792575835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,31,0.012432000041007996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,bfloat16,4095,0.698798418045044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,31,0.013260799646377563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,63,0.012488000094890594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,127,0.012899200618267059
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,127,0.012812800705432892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,63,0.012828800082206725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,255,0.014713600277900696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,255,0.014737600088119506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,511,0.01748799979686737
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,511,0.01879040002822876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,1023,0.03667519986629486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,1023,0.02561599910259247
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,fp8,4095,0.027326399087905885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,2047,0.058873599767684935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,bfloat16,4095,0.10284960269927979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,4095,0.06614559888839722
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,bfloat16,fp8,31,2.9201311111450194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,bfloat16,1,0.12596160173416138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,fp8,1,0.15254559516906738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,bfloat16,3,0.12638720273971557
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,bfloat16,7,0.12598079442977905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,fp8,7,0.15287679433822632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,bfloat16,15,0.12724159955978392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,fp8,15,0.1551759958267212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,fp8,3,0.1539520025253296
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,bfloat16,31,0.13316160440444946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,fp8,31,0.15436320304870604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,bfloat16,63,0.13549920320510864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,fp8,63,0.1647264003753662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,bfloat16,127,0.14132959842681886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,fp8,127,0.17426719665527343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,bfloat16,255,0.21082561016082763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,fp8,255,0.2179744005203247
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,bfloat16,511,0.3946768045425415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,bfloat16,bfloat16,31,0.012204799801111221
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,bfloat16,1023,0.7172080039978027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,fp8,1023,0.5427743911743164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,bfloat16,fp8,4095,0.4390431880950928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,bfloat16,1,0.23671200275421142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,fp8,1,0.29463040828704834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,bfloat16,3,0.23939359188079834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,fp8,3,0.2919071912765503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,bfloat16,7,0.2395695924758911
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,bfloat16,fp8,2047,0.0404559999704361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,bfloat16,15,0.24161601066589355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,bfloat16,2047,1.3860112190246583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,fp8,7,0.296726393699646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,fp8,2047,0.9033647537231445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,fp8,15,0.29484000205993655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,bfloat16,31,0.2512495994567871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,fp8,31,0.30496160984039306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,fp8,63,0.31379199028015137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,bfloat16,63,0.2701456069946289
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,fp8,127,0.3327375888824463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,bfloat16,127,0.2728032112121582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,bfloat16,255,0.3962591886520386
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,fp8,255,0.41696481704711913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,bfloat16,1,0.46339998245239256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,fp8,1,0.5773104190826416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,bfloat16,3,0.4603568077087402
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,bfloat16,511,0.7735424041748047
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,bfloat16,fp8,511,0.6643360137939454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,fp8,3,0.5719711780548096
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,bfloat16,fp8,511,0.3411344051361084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,bfloat16,7,0.4722447872161865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,bfloat16,15,0.48577442169189455
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,fp8,7,0.5765471935272217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,bfloat16,31,0.500324821472168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,fp8,15,0.5780879974365234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,bfloat16,63,0.4913951873779297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,fp8,31,0.5885503768920899
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,fp8,63,0.6085311889648437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,1,0.017049600183963776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,1,0.01652639955282211
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,3,0.017052799463272095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,3,0.016606399416923524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,7,0.016337600350379945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,7,0.017047999799251555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,15,0.016675199568271636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,15,0.017263999581336974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,fp8,127,0.6516223907470703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,31,0.01635040044784546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,31,0.017550399899482726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,63,0.016359999775886536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,127,0.01655520051717758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,127,0.0176256000995636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,255,0.020715199410915375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,255,0.020974400639533996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,511,0.029110398888587952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,511,0.037212800979614255
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,1023,0.060094398260116574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,1023,0.04470399916172028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,fp8,255,0.8203167915344238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,2047,0.0684112012386322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,4095,0.1881600022315979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,4095,0.12530399560928346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,bfloat16,1,0.9359423637390136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,bfloat16,3,0.9522560119628907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,fp8,1,1.1036288261413574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,fp8,3,1.1025936126708984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,bfloat16,7,0.9346207618713379
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,fp8,7,1.1499855995178223
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,bfloat16,15,0.9540255546569825
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,fp8,15,1.1587072372436524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,bfloat16,127,0.5124256134033203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,bfloat16,31,0.938043212890625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,fp8,63,0.01763039976358414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,bfloat16,bfloat16,255,0.7817872047424317
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,bfloat16,bfloat16,2047,0.10340160131454468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,fp8,31,1.1830703735351562
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,fp8,63,1.1538895606994628
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,bfloat16,127,1.0089216232299805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,fp8,127,1.2412431716918946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,bfloat16,1,1.8848175048828124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,bfloat16,3,1.883568000793457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,bfloat16,7,1.8486272811889648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,fp8,1,2.39268798828125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,fp8,3,2.290056037902832
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,bfloat16,15,1.8538047790527343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,fp8,7,2.358363151550293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,1,0.023846399784088135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,1,0.025438401103019714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,3,0.023841600120067596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,3,0.025964799523353576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,7,0.023937599360942842
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,bfloat16,bfloat16,63,0.9630687713623047
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,7,0.02595199942588806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,15,0.02600800096988678
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,fp8,15,2.334462356567383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,15,0.024244800209999084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,bfloat16,31,1.882521629333496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,31,0.023849600553512575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,63,0.024273599684238433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,31,0.02574560046195984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,127,0.02447360008955002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,63,0.02585279941558838
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,127,0.026473599672317504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,255,0.04190559983253479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,255,0.03444800078868866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,511,0.06203680038452149
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,511,0.05153120160102844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,1023,0.10517120361328125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,1023,0.07704960107803345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,bfloat16,63,1.8942720413208007
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,2047,0.1244047999382019
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,fp8,31,2.2996063232421875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,1,0.0531216025352478
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,3,0.05334240198135376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,fp8,4095,0.22165920734405517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,bfloat16,fp8,63,2.275016021728516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,7,0.0533519983291626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,4095,0.3580127954483032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,1,0.06344000101089478
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,3,0.06217600107192993
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,31,0.06356319785118103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,15,0.06243039965629578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,15,0.052855998277664185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,31,0.053472000360488894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,63,0.05796480178833008
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,127,0.06155359745025635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,255,0.09495999813079833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,127,0.07076159715652466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,255,0.09056640267372132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,511,0.1566848039627075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,511,0.1345039963722229
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,1023,0.20853118896484374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,2047,0.34535040855407717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,2047,0.5343135833740235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,1,0.011998400092124939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,1,0.012559999525547028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,3,0.011856000125408172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,3,0.012619200348854064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,7,0.0119439996778965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,7,0.012646399438381195
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,15,0.011955200135707856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,15,0.012582400441169738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,31,0.011822400242090225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,4095,0.6604415893554687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,31,0.012596799433231354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,63,0.0124719999730587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,63,0.012179200351238251
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,127,0.011955200135707856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,255,0.013711999356746673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,255,0.01462559998035431
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,511,0.016780799627304076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,bfloat16,bfloat16,2047,0.18977440595626832
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,511,0.018249599635601042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,1023,0.017214399576187134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,2047,0.019232000410556793
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,2047,0.019113600254058838
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,4095,0.03208959996700287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,4095,0.024751999974250795
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,7,0.06201440095901489
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,1,0.012185599654912949
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,fp8,63,0.06420159935951233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,3,0.012612800300121307
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,3,0.012987199425697326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,7,0.012559999525547028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,7,0.012960000336170197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,15,0.012558400630950928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,15,0.012928000092506409
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,31,0.012627199292182922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,31,0.01286720037460327
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,63,0.01244639977812767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,63,0.01289760023355484
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,127,0.012460800260305405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,1023,0.2813967943191528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,127,0.012921600043773651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,255,0.014161600172519684
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,511,0.01693439930677414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,511,0.0189983993768692
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,255,0.014707200229167938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,1023,0.019377599656581878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,1023,0.024057599902153014
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,2047,0.03799520134925842
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,2047,0.031060799956321716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,bfloat16,4095,0.06220639944076538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,4095,0.05018240213394165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,1,0.03147520124912262
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,1,0.036822399497032164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,3,0.030937600135803222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,3,0.03575359880924225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,7,0.032400000095367434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,7,0.03504480123519897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,15,0.032451200485229495
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,15,0.035104000568389894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,31,0.03253119885921478
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,31,0.03511520028114319
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,63,0.03684960007667541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,127,0.03690719902515412
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,127,0.03736959993839264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,255,0.05542880296707153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,255,0.051976001262664794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,511,0.0858240008354187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,511,0.07427520155906678
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,1023,0.11294080018997192
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,1023,0.15097919702529908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,fp8,127,0.01297760009765625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,2047,0.1799936056137085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,2047,0.27688798904418943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,bfloat16,bfloat16,1023,0.016340799629688263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,1,0.01266079992055893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,3,0.012620800733566284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,4095,0.5295872211456298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,3,0.013439999520778656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,1,0.01321599930524826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,7,0.013752000033855438
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,15,0.012491200119256973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,fp8,4095,0.3288144111633301
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,15,0.013348799943923951
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,7,0.012243200093507767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,31,0.012375999987125397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,bfloat16,fp8,1,0.01279519945383072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,63,0.012624000012874604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,127,0.01247360035777092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,63,0.013104000687599182
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,127,0.013459199666976928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,31,0.012987199425697326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,255,0.014526399970054626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,255,0.01515520066022873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,511,0.019171200692653656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,511,0.01719360053539276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,1023,0.03423359990119934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,1023,0.029974400997161865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,2047,0.051712000370025636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,2047,0.044961598515510556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,bfloat16,4095,0.08589439988136291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,bfloat16,fp8,4095,0.06951519846916199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,fp8,1,0.1178272008895874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,bfloat16,3,0.09803360104560851
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,bfloat16,7,0.0972111999988556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,bfloat16,15,0.09845439791679382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,fp8,15,0.11757919788360596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,bfloat16,31,0.09971839785575867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,fp8,31,0.11756960153579712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,bfloat16,63,0.1042464017868042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,fp8,63,0.12497919797897339
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,bfloat16,127,0.11036640405654907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,fp8,127,0.13449920415878297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,bfloat16,bfloat16,63,0.03181599974632263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,bfloat16,255,0.1718127965927124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,fp8,255,0.17002559900283815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,bfloat16,bfloat16,4095,1.0440128326416016
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,bfloat16,511,0.3012383937835693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,fp8,511,0.25947520732879636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,bfloat16,1023,0.5438191890716553
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,fp8,1023,0.4151311874389648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,bfloat16,1,0.18394880294799804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,fp8,1,0.2250447988510132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,fp8,2047,0.687556791305542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,bfloat16,3,0.18406879901885986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,fp8,3,0.22487521171569824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,bfloat16,2047,1.046835231781006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,fp8,7,0.22581920623779297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,bfloat16,15,0.18356640338897706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,bfloat16,7,0.18431040048599243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,bfloat16,31,0.1954543948173523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,bfloat16,63,0.19673759937286378
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,bfloat16,1,0.09854239821434022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,fp8,15,0.22312479019165038
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,fp8,3,0.11885600090026856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,fp8,63,0.24015839099884034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,bfloat16,fp8,7,0.11816639900207519
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,bfloat16,127,0.2047856092453003
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,fp8,127,0.25701920986175536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,bfloat16,255,0.3091056108474731
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,fp8,255,0.31722240447998046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,bfloat16,1,0.3502608060836792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,fp8,1,0.43563361167907716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,bfloat16,3,0.35346720218658445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,fp8,511,0.49661760330200194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,bfloat16,511,0.590718412399292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,bfloat16,7,0.35086081027984617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,fp8,3,0.4258399963378906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,fp8,7,0.4306335926055908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,bfloat16,15,0.3710655927658081
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,fp8,15,0.4369071960449219
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,bfloat16,31,0.3703039884567261
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,bfloat16,63,0.37380480766296387
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,fp8,31,0.45917439460754395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,fp8,63,0.45471677780151365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,1,0.016121600568294526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,1,0.017369599640369417
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,fp8,127,0.49587039947509765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,3,0.017396800220012665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,7,0.016371199488639833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,7,0.01743520051240921
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,bfloat16,255,0.5952479839324951
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,15,0.016473600268363954
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,15,0.017479999363422392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,31,0.0177839994430542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,63,0.016256000101566314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,31,0.016569599509239197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,fp8,255,0.6171887874603271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,127,0.01643040031194687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,127,0.01791999936103821
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,255,0.020105600357055664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,255,0.02147040069103241
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,511,0.03281280100345611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,511,0.028340798616409302
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,1023,0.05101119875907898
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,1023,0.046614399552345274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,2047,0.08516160249710084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,2047,0.06955680251121521
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,4095,0.15404800176620484
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,4095,0.12042560577392578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,bfloat16,fp8,31,0.22698719501495362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,bfloat16,1,0.7005856037139893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,bfloat16,3,0.7052800178527832
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,fp8,1,0.8300080299377441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,fp8,3,0.8309951782226562
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,bfloat16,7,0.7176655769348145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,fp8,7,0.8365072250366211
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,bfloat16,15,0.7187392234802246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,bfloat16,31,0.7225296020507812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,bfloat16,bfloat16,127,0.3925407886505127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,fp8,15,0.8910079956054687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,bfloat16,3,0.015961599349975587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,bfloat16,63,0.7281824111938476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,fp8,31,0.8823984146118165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,bfloat16,fp8,63,0.017843200266361235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,fp8,63,0.887553596496582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,bfloat16,127,0.7648863792419434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,bfloat16,fp8,127,0.9514944076538085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,bfloat16,1,1.4152432441711427
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,bfloat16,3,1.4136575698852538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,fp8,1,1.7077760696411133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,bfloat16,7,1.4070704460144043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,fp8,3,1.7373327255249023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,bfloat16,15,1.4164591789245606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,1,0.01988160014152527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,fp8,7,1.7542816162109376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,1,0.021982400119304656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,3,0.022100800275802614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,3,0.01979680061340332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,7,0.020185600221157073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,7,0.021740800142288207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,bfloat16,31,1.418614387512207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,15,0.020193600654602052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,15,0.021745599806308746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,fp8,15,1.7202608108520507
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,31,0.019383999705314636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,31,0.02213920056819916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,63,0.019814400374889372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,127,0.019636799395084382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,63,0.02138720005750656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,255,0.03376159965991974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,255,0.02709920108318329
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,511,0.04898560047149658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,511,0.04163039922714233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,1023,0.08325920104980469
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,1023,0.06055999994277954
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,bfloat16,63,1.4202960014343262
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,2047,0.1457376003265381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,2047,0.09560160040855407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,3,0.03818239867687225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,1,0.038227200508117676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,4095,0.1725759983062744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,bfloat16,4095,0.2738048076629639
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,fp8,63,1.755945587158203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,1,0.04452320039272308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,3,0.0437391996383667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,31,0.03880000114440918
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,31,0.04374560117721558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,63,0.039982399344444274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,15,0.04528000056743622
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,63,0.04386880099773407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,127,0.04814240038394928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,127,0.04545120000839233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,255,0.07078400254249573
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,255,0.0639840006828308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,511,0.11162879467010497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,511,0.09133599996566773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,1023,0.19889600276947023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,1023,0.14117439985275268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,2047,0.36319520473480227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,2047,0.24070239067077637
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,4095,0.7039008140563965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,4095,0.4336671829223633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,1,0.011422400176525117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,1,0.012172800302505494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,3,0.011398400366306304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,bfloat16,fp8,127,0.022171199321746826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,3,0.012281599640846252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,bfloat16,fp8,31,1.7080368041992187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,7,0.011552000045776367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,15,0.011590400338172912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,8191,1.3842543601989745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,15,0.012187200039625168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,7,0.012003199756145477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,31,0.012252800166606903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,63,0.01154559999704361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,31,0.012919999659061432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,63,0.012300799787044524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,127,0.01165120005607605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,7,0.03852800130844116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,255,0.013383999466896057
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,255,0.01395999938249588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,511,0.016225600242614747
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,511,0.017696000635623932
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,7,0.04565440118312836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,1023,0.01563359946012497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,2047,0.017688000202178956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,2047,0.01737920045852661
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,4095,0.021590399742126464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,1023,0.01653279960155487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,4095,0.020838400721549986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,bfloat16,8191,0.037536001205444335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,1,0.011774399876594543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,1,0.012508800625801087
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,3,0.012115199863910676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,3,0.01223360002040863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,7,0.011952000111341477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,7,0.012588800489902496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,15,0.011604800075292587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,15,0.012646399438381195
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,31,0.012031999975442886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,31,0.012240000069141388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,63,0.011903999745845795
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,63,0.012588800489902496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,127,0.011814399808645248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,127,0.012542399764060973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,255,0.01372320055961609
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,255,0.014230400323867798
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,511,0.016735999286174773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,511,0.01805119961500168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,1023,0.01728159934282303
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,1023,0.017004799842834473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,2047,0.020136000216007234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,2047,0.020505599677562714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,4095,0.03718560039997101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,4095,0.02735520005226135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,bfloat16,8191,0.05881440043449402
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,bfloat16,fp8,8191,0.04163039922714233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,1,0.024087999761104584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,1,0.026347199082374574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,3,0.024188800156116484
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,3,0.026795199513435362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,7,0.02383680045604706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,7,0.026625600457191468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,15,0.024099199473857878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,15,0.026185598969459534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,31,0.024271999299526215
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,31,0.026956799626350402
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,63,0.02391040027141571
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,63,0.026716798543930054
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,127,0.025515198707580566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,127,0.02624480128288269
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,255,0.04144960045814514
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,255,0.03530080020427704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,511,0.06314880251884461
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,511,0.0517408013343811
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,1023,0.10767359733581543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,1023,0.07705119848251343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,2047,0.1924847960472107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,2047,0.125600004196167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,4095,0.36042079925537107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,4095,0.22364161014556885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,1,0.011910399794578553
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,1,0.012465599924325943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,fp8,8191,0.8355327606201172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,3,0.011939200013875962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,7,0.011956799775362015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,bfloat16,8191,0.702726411819458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,7,0.012606400251388549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,3,0.012641599774360657
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,bfloat16,fp8,8191,0.42426562309265137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,bfloat16,bfloat16,15,0.03842560052871704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,15,0.012587200105190276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,15,0.012134400010108948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,31,0.011992000043392181
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,31,0.012606400251388549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,127,0.012276799976825714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,255,0.013803200423717498
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,8191,0.027535998821258546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,511,0.016771200299263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,511,0.018457600474357606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,1023,0.01982080042362213
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,1023,0.02115360051393509
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,2047,0.03718400001525879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,2047,0.027004799246788024
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,4095,0.05908160209655762
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,4095,0.042052799463272096
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,8191,0.10183520317077636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,8191,0.06700639724731446
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,1,0.06946880221366883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,1,0.08276000022888183
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,3,0.06944800019264222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,3,0.08266879916191101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,7,0.06949120163917541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,7,0.08263199925422668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,15,0.0697376012802124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,15,0.08358240127563477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,31,0.0836031973361969
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,63,0.07560319900512695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,63,0.08709279894828796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,127,0.08027200102806091
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,127,0.09446880221366882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,bfloat16,fp8,127,0.013737599551677703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,63,0.012542399764060973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,255,0.12208319902420044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,255,0.11986240148544311
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,127,0.013094399869441987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,fp8,255,0.01478559970855713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,511,0.21080319881439208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,511,0.1782464027404785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,1023,0.2811919927597046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,2047,0.7074528217315674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,2047,0.46072959899902344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,bfloat16,1,0.12590559720993041
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,fp8,1,0.15285439491271974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,bfloat16,3,0.12555999755859376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,fp8,3,0.15226399898529053
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,bfloat16,7,0.1256368041038513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,fp8,7,0.15245120525360106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,4095,1.3943216323852539
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,bfloat16,15,0.12591520547866822
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,fp8,4095,0.8790351867675781
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,fp8,15,0.1527135968208313
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,31,0.06989759802818299
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,fp8,31,0.15281599760055542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,fp8,63,0.16391520500183104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,bfloat16,63,0.14245920181274413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,bfloat16,127,0.14428800344467163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,fp8,127,0.170961594581604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,bfloat16,255,0.22099199295043945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,fp8,255,0.22475359439849854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,fp8,511,0.3371311902999878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,bfloat16,511,0.4074079990386963
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,bfloat16,bfloat16,63,0.0119439996778965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,bfloat16,1,0.23872160911560059
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,fp8,1,0.2901087999343872
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,bfloat16,3,0.24100639820098876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,bfloat16,1023,0.7139776229858399
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,fp8,1023,0.5413631916046142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,bfloat16,7,0.23877921104431152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,fp8,3,0.295743989944458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,fp8,7,0.2905951976776123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,bfloat16,15,0.24527840614318847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,bfloat16,bfloat16,1023,0.3727407932281494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,fp8,31,0.3072783946990967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,bfloat16,63,0.2607311964035034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,bfloat16,31,0.25370240211486816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,fp8,15,0.29041600227355957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,fp8,63,0.3116863965988159
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,bfloat16,127,0.27527039051055907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,1,0.012443199753761292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,fp8,127,0.33263359069824217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,3,0.012439999729394913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,7,0.012441600114107132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,1,0.013043199479579926
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,3,0.013044799864292144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,7,0.013107199966907502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,15,0.012518399953842163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,15,0.013023999333381654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,fp8,255,0.42759199142456056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,31,0.012510399520397186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,31,0.013012799620628356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,63,0.012382400035858155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,127,0.012727999687194824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,63,0.013017599284648896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,127,0.013014400005340576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,255,0.01430879980325699
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,255,0.015169599652290344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,511,0.017566399276256563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,511,0.018713599443435668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,fp8,511,0.6571631908416748
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,1023,0.036652800440788266
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,bfloat16,511,0.7883503913879395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,1023,0.027158400416374205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,2047,0.05813599824905395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,2047,0.04067200124263763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,4095,0.06694080233573914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,4095,0.10247999429702759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,fp8,8191,0.11568319797515869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,bfloat16,bfloat16,8191,0.20684959888458251
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,bfloat16,1,0.4613167762756348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,bfloat16,bfloat16,31,0.13989919424057007
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,fp8,1,0.5593311786651611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,bfloat16,3,0.46549282073974607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,bfloat16,7,0.4688704013824463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,fp8,3,0.5681920051574707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,fp8,7,0.558190393447876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,bfloat16,15,0.48436799049377444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,bfloat16,31,0.4880256175994873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,fp8,15,0.5878687858581543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,fp8,31,0.604041576385498
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,bfloat16,63,0.4926127910614014
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,fp8,63,0.5998047828674317
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,bfloat16,127,0.522273588180542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,fp8,127,0.6284063816070556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,bfloat16,255,0.784339189529419
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,bfloat16,1,0.9390527725219726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,bfloat16,fp8,255,0.8186976432800293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,bfloat16,3,0.9462688446044922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,fp8,1,1.111076831817627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,bfloat16,bfloat16,255,0.4069727897644043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,fp8,3,1.1066224098205566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,bfloat16,7,0.9669936180114747
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,bfloat16,15,0.9475760459899902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,fp8,7,1.1300607681274415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,bfloat16,31,0.9655664443969727
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,fp8,15,1.1792367935180663
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,1,0.016651199758052827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,1,0.017385600507259368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,3,0.01663520038127899
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,3,0.01754239946603775
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,bfloat16,63,0.9626064300537109
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,7,0.01664000004529953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,7,0.017507199943065644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,15,0.017819200456142426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,31,0.016284799575805663
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,15,0.015761600434780122
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,31,0.017752000689506532
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,63,0.01597920060157776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,63,0.016926400363445282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,127,0.016939200460910797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,fp8,63,1.175489616394043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,127,0.01695519983768463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,255,0.02014079988002777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,bfloat16,127,1.018017578125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,511,0.028300800919532777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,511,0.037339198589324954
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,1023,0.06072160005569458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,fp8,127,1.1948063850402832
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,2047,0.0680895984172821
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,2047,0.10432959794998169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,4095,0.18771040439605713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,4095,0.11797920465469361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,1,0.031595200300216675
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,3,0.031641599535942075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,8191,0.21499199867248536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,bfloat16,8191,0.3607568025588989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,7,0.03130080103874207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,1,0.03585920035839081
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,15,0.031350401043891904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,15,0.03588640093803406
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,31,0.03139199912548065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,7,0.03635199964046478
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,31,0.03589119911193848
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,63,0.03143360018730164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,127,0.03890720009803772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,255,0.058462399244308474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,255,0.05145919919013977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,511,0.08806239962577819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,511,0.0731872022151947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,1023,0.15474239587783814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,1023,0.1111680030822754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,2047,0.2795151948928833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,2047,0.18073439598083496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,255,0.02191520035266876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,4095,0.32842240333557127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,4095,0.5313216209411621
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,bfloat16,fp8,1023,0.0472784012556076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,1,0.011939200013875962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,3,0.01207360029220581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,1,0.012483199685811996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,7,0.011699199676513672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,3,0.012772800028324127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,7,0.012529599666595458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,15,0.01210559979081154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,15,0.01252480000257492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,31,0.012110400199890136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,3,0.035464000701904294
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,31,0.01268479973077774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,63,0.012081599980592727
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,8191,0.6380112171173096
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,127,0.012017600238323212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,127,0.012772800028324127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,255,0.014585599303245544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,511,0.01668799966573715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,63,0.036374399065971376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,511,0.018326400220394133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,1023,0.01725279986858368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,1023,0.017820799350738527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,2047,0.01748320013284683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,fp8,127,0.03613280057907105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,4095,0.02014559954404831
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,4095,0.020337599515914916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,8191,0.03325600028038025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,8191,0.02542240023612976
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,1,0.01318880021572113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,3,0.011868800222873687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,3,0.012572799623012543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,7,0.011902400106191636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,7,0.012678399682044983
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,15,0.012067200243473053
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,15,0.012873600423336028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,31,0.01213119998574257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,31,0.01276479959487915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,63,0.012163200229406358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,63,0.012824000418186187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,127,0.01207199990749359
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,127,0.01287200003862381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,bfloat16,fp8,31,1.1746399879455567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,255,0.013920000195503235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,511,0.01695519983768463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,511,0.018331199884414673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,255,0.014752000570297241
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,1023,0.017312000691890716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,1023,0.018462400138378143
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,2047,0.02051360011100769
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,2047,0.020369599759578704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,4095,0.03269279897212982
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,4095,0.02610880136489868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,8191,0.050019198656082155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,fp8,8191,0.03900319933891296
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,1,0.020443199574947356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,1,0.02250239998102188
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,3,0.020420800149440765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,3,0.022475199401378633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,7,0.020363199710845947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,7,0.022536000609397887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,15,0.020313599705696107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,15,0.022563199698925018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,31,0.020420800149440765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,31,0.022609600424766542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,63,0.020399999618530274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,63,0.02263679951429367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,127,0.02080959975719452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,127,0.022705599665641785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,63,0.012228800356388092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,255,0.034971201419830324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,255,0.028585600852966308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,511,0.04227679967880249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,bfloat16,bfloat16,8191,1.0413552284240724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,1023,0.08568959832191467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,1023,0.06341760158538819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,fp8,2047,0.018427200615406036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,2047,0.09949600100517272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,bfloat16,bfloat16,1,0.012144000083208085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,2047,0.17123839855194092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,4095,0.17461760044097902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,4095,0.2749295949935913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,1,0.012608000636100769
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,1,0.013257600367069244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,3,0.012319999933242797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,7,0.01241919994354248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,3,0.013225600123405457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,7,0.013191999495029449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,15,0.012486399710178375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,15,0.013260799646377563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,fp8,8191,0.3232464075088501
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,31,0.012230399996042252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,31,0.013515199720859527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,63,0.012455999851226807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,127,0.01223199963569641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,8191,0.5308144092559814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,255,0.014251199364662171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,63,0.013167999684810638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,255,0.014825600385665893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,511,0.018863999843597413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,1023,0.02054080069065094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,2047,0.039827200770378116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,2047,0.03213599920272827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,4095,0.06375679969787598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,4095,0.05400480031967163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,8191,0.10906879901885987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,8191,0.0879967987537384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,1,0.05568000078201294
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,1,0.06476640105247497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,3,0.0557807981967926
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,3,0.06480000019073487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,7,0.05571039915084839
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,7,0.06477439999580384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,15,0.055852800607681274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,15,0.06487200260162354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,31,0.055615997314453124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,31,0.06478880047798156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,63,0.0605567991733551
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,63,0.06492000222206115
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,127,0.06998400092124939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,127,0.0729632019996643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,255,0.10329920053482056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,255,0.0933023989200592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,bfloat16,bfloat16,511,0.050488001108169554
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,511,0.13614399433135987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,bfloat16,bfloat16,255,0.013659200072288514
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,1023,0.21145761013031006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,1023,0.2858671903610229
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,2047,0.5379312038421631
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,2047,0.35238239765167234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,127,0.013307200372219085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,bfloat16,1,0.09997439980506898
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,fp8,1023,0.024516800045967103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,bfloat16,3,0.09855039715766907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,fp8,3,0.11904000043869019
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,4095,1.0452112197875976
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,fp8,4095,0.6558144092559814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,bfloat16,7,0.09824159741401672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,fp8,15,0.11923359632492066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,bfloat16,31,0.10058399438858032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,fp8,31,0.11921919584274292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,bfloat16,63,0.10697920322418213
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,fp8,63,0.12890080213546753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,bfloat16,127,0.12332320213317871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,fp8,127,0.13540159463882445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,bfloat16,255,0.19482719898223877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,fp8,255,0.17260639667510985
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,bfloat16,511,0.30970718860626223
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,fp8,511,0.2633424043655396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,fp8,1023,0.4118207931518555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,bfloat16,1023,0.5439104080200196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,bfloat16,bfloat16,511,0.017080000042915343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,bfloat16,1,0.18462560176849366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,fp8,1,0.22175519466400145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,bfloat16,3,0.18580000400543212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,fp8,3,0.22396960258483886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,bfloat16,7,0.1839743971824646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,fp8,7,0.2245568037033081
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,bfloat16,15,0.18600800037384033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,fp8,15,0.22283360958099366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,bfloat16,15,0.09944480061531066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,bfloat16,31,0.19645919799804687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,fp8,31,0.2289520025253296
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,bfloat16,63,0.19784640073776244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,fp8,63,0.2430720090866089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,bfloat16,127,0.21224000453948974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,fp8,127,0.25774240493774414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,bfloat16,255,0.3696896076202393
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,fp8,255,0.3239680051803589
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,1,0.012204799801111221
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,1,0.01353600025177002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,3,0.012198399752378464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,3,0.013476799428462981
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,7,0.012190400063991547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,bfloat16,511,0.6060272216796875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,7,0.01348000019788742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,15,0.01228799968957901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,bfloat16,bfloat16,511,0.1619968056678772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,bfloat16,fp8,511,0.49567041397094724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,31,0.013076800107955932
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,15,0.013344000279903411
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,63,0.012891200184822083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,127,0.012427199631929398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,255,0.014699199795722961
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,127,0.013395200669765472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,255,0.015065599977970124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,511,0.01786399930715561
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,511,0.01883520036935806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,1023,0.03578239977359772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,1023,0.03097440004348755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,2047,0.05284479856491089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,2047,0.04531359970569611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,4095,0.08706079721450806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,4095,0.07252960205078125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,8191,0.1562543988227844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,8191,0.12157440185546875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,bfloat16,1,0.35223519802093506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,fp8,1,0.4312863826751709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,bfloat16,3,0.3512160062789917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,bfloat16,7,0.35277280807495115
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,fp8,1,0.12120800018310547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,bfloat16,15,0.3670016050338745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,fp8,7,0.4318240165710449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,fp8,15,0.429201602935791
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,bfloat16,31,0.3771775960922241
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,bfloat16,fp8,7,0.11911840438842773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,fp8,31,0.4548384189605713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,bfloat16,63,0.38190879821777346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,fp8,63,0.45832161903381347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,bfloat16,127,0.40580158233642577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,fp8,127,0.47730240821838377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,bfloat16,255,0.7059279918670655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,fp8,255,0.6270720005035401
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,bfloat16,1,0.7012464046478272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,bfloat16,31,0.012636800110340119
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,fp8,1,0.844598388671875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,bfloat16,3,0.7067647933959961
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,bfloat16,fp8,63,0.013495999574661254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,bfloat16,7,0.7134880065917969
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,fp8,7,0.8335727691650391
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,bfloat16,15,0.7244224071502685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,bfloat16,31,0.722327995300293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,fp8,15,0.8880703926086426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,1,0.016097599267959596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,fp8,31,0.8964816093444824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,1,0.017427200078964235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,3,0.01637600064277649
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,3,0.01743199974298477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,7,0.016331200301647187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,7,0.0174127995967865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,bfloat16,fp8,3,0.4203936100006104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,15,0.01640480011701584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,15,0.017297600209712983
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,fp8,63,0.8924448013305664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,31,0.016385599970817566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,31,0.017294399440288544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,bfloat16,127,0.7797776222229004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,63,0.01783200055360794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,127,0.015990400314331056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,63,0.016147199273109435
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,255,0.019996799528598785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,511,0.03205600082874298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,fp8,127,0.920406436920166
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,511,0.029046401381492615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,1023,0.050672000646591185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,1023,0.046372801065444946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,2047,0.08528800010681152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,4095,0.12311359643936157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,4095,0.15382080078125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,8191,0.22376160621643065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,bfloat16,8191,0.2922976016998291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,1,0.024217599630355836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,3,0.024265600740909575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,1,0.02764959931373596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,3,0.027444800734519957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,7,0.0274399995803833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,15,0.024353599548339842
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,15,0.02749119997024536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,31,0.024347199499607085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,31,0.027529600262641906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,fp8,3,0.827244758605957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,63,0.024619199335575104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,127,0.025435200333595274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,127,0.027492800354957582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,63,0.02709439992904663
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,255,0.043075200915336606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,255,0.03548319935798645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,511,0.06332319974899292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,511,0.05222079753875732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,bfloat16,bfloat16,63,0.7284976005554199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,1023,0.10846879482269287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,2047,0.1941104054450989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,2047,0.12659039497375488
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,255,0.021300800144672394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,4095,0.22653601169586182
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,4095,0.36210720539093016
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,2047,0.0684112012386322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,8191,0.7051119804382324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,8191,0.4272784233093262
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,1,0.011428800225257874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,1,0.012296000123023986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,3,0.011475200206041336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,3,0.012240000069141388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,7,0.011449600011110306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,7,0.012350399792194367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,15,0.011446399986743927
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,15,0.01233920007944107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,31,0.011403200030326844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,7,0.024846400320529937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,31,0.012329600006341934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,63,0.011406400054693223
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,16383,0.8084815979003906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,127,0.0115167997777462
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,63,0.012415999919176102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,255,0.01422239989042282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,511,0.016312000155448914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,127,0.012107200175523757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,255,0.013156799972057343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,511,0.017969599366188048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,1023,0.01600479930639267
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,1023,0.016543999314308167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,2047,0.017103999853134155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,4095,0.018408000469207764
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,bfloat16,16383,1.3892687797546386
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,2047,0.016271999478340148
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,4095,0.01774719953536987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,8191,0.022275200486183165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,8191,0.022015999257564544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,1,0.011577600240707397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,fp8,16383,0.027816000580787658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,3,0.011526399850845337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,3,0.012723200023174286
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,7,0.012427199631929398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,15,0.011673600226640702
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,15,0.01271039992570877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,31,0.011753600090742111
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,63,0.01183520033955574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,63,0.01236959993839264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,127,0.011830399930477142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,31,0.01231200024485588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,127,0.012348800152540206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,255,0.013507199287414551
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,255,0.014417600631713868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,511,0.0182559996843338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,1023,0.016011199355125426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,bfloat16,fp8,1023,0.07793759703636169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,1023,0.01717119961977005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,2047,0.017955200374126436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,2047,0.01703840047121048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,4095,0.021939200162887574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,4095,0.021201600134372712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,8191,0.037448000907897946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,8191,0.02732959985733032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,16383,0.05921120047569275
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,16383,0.04254559874534607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,1,0.016518400609493257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,1,0.017975999414920805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,3,0.016259199380874632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,3,0.017972800135612487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,7,0.01624480038881302
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,7,0.017960000038146972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,15,0.01626719981431961
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,15,0.017980800569057466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,31,0.016196799278259278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,31,0.01800000071525574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,63,0.016281600296497344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,63,0.017985600233078002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,127,0.016412800550460814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,127,0.01799360066652298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,255,0.020367999374866486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,255,0.021902400255203246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,511,0.03645119965076447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,511,0.02940959930419922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,1023,0.059956800937652585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,1023,0.046025601029396054
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,2047,0.07039200067520142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,2047,0.10410399436950683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,4095,0.11906559467315674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,4095,0.1884160041809082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,8191,0.21884000301361084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,8191,0.3620879888534546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,fp8,1,0.01242400035262108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,bfloat16,bfloat16,16383,0.04104639887809754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,1,0.011950399726629257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,3,0.011812800168991089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,3,0.012611199915409089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,7,0.011785600334405899
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,1,0.012673600018024445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,511,0.016264000535011293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,15,0.011840000003576278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,fp8,16383,0.4116799831390381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,15,0.01265760064125061
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,63,0.011791999638080596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,bfloat16,bfloat16,16383,0.7054895877838134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,31,0.012441600114107132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,63,0.012668800354003907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,127,0.012513600289821625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,255,0.013635200262069703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,127,0.012012799829244613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,255,0.014443199336528777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,511,0.016700799763202667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,bfloat16,fp8,127,0.01786080002784729
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,511,0.018241600692272188
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,1023,0.017407999932765962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,1023,0.017158399522304534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,2047,0.019865599274635316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,4095,0.02728799879550934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,4095,0.03725920021533966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,8191,0.05889279842376709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,8191,0.042803201079368594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,16383,0.1023743987083435
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,1,0.04103519916534424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,16383,0.06685760021209716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,3,0.04102399945259094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,7,0.04091359972953797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,3,0.04694719910621643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,7,0.04715040028095245
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,15,0.04043200016021729
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,15,0.047260800004005434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,31,0.04051679968833923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,31,0.0473008006811142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,63,0.04165599942207336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,63,0.04724799990653992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,127,0.04892480075359344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,127,0.048283201456069944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,255,0.07337440252304077
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,255,0.06621440052986145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,511,0.11344319581985474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,511,0.09576640129089356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,1023,0.20234880447387696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,1023,0.14690079689025878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,bfloat16,bfloat16,7,0.011547199636697768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,2047,0.24218719005584716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,2047,0.3715807914733887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,7,0.012503999471664428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,bfloat16,31,0.011854399740695954
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,bfloat16,1,0.07165120244026184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,fp8,1,0.08481760025024414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,4095,0.7062751770019531
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,4095,0.44268321990966797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,bfloat16,3,0.07133280038833618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,bfloat16,fp8,2047,0.020878399908542632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,fp8,7,0.08481600284576415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,fp8,3,0.08405439853668213
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,bfloat16,15,0.07217119932174683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,fp8,15,0.08488960266113281
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,bfloat16,31,0.07088159918785095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,fp8,31,0.08528159856796265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,fp8,63,0.08537920117378235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,1,0.04655199944972992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,bfloat16,127,0.08072320222854615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,fp8,8191,0.8445615768432617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,bfloat16,255,0.1288864016532898
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,fp8,255,0.119159996509552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,bfloat16,bfloat16,8191,1.386467170715332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,fp8,511,0.1798624038696289
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,bfloat16,511,0.2329200029373169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,bfloat16,1023,0.37390239238739015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,fp8,1023,0.30482399463653564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,bfloat16,1,0.12947360277175904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,bfloat16,3,0.12954720258712768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,fp8,2047,0.47347521781921387
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,fp8,1,0.162608003616333
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,bfloat16,2047,0.7141791820526123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,fp8,3,0.15626239776611328
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,fp8,7,0.1627616047859192
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,bfloat16,7,0.12874399423599242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,fp8,15,0.15783040523529052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,bfloat16,15,0.1299056053161621
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,bfloat16,31,0.14152640104293823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,fp8,31,0.15582879781723022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,bfloat16,63,0.14063680171966553
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,bfloat16,127,0.1552575945854187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,fp8,63,0.1671455979347229
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,fp8,127,0.1749776005744934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,bfloat16,255,0.22700800895690917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,fp8,255,0.22715039253234864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,fp8,511,0.34098238945007325
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,bfloat16,511,0.4140768051147461
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,1,0.01202400028705597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,1,0.013033600151538849
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,3,0.012031999975442886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,bfloat16,7,0.07111999988555909
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,7,0.012118399888277055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,15,0.012092799693346024
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,15,0.012892800569534301
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,fp8,1023,0.5512976169586181
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,bfloat16,bfloat16,1023,0.7229663848876953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,31,0.012167999893426895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,63,0.012899200618267059
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,fp8,127,0.09625599980354309
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,63,0.012217599898576736
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,31,0.013219200074672699
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,127,0.012188799679279327
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,127,0.013036799430847169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,511,0.016961599886417388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,255,0.013900800049304963
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,511,0.018649600446224213
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,1023,0.01963520050048828
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,1023,0.021113599836826324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,2047,0.03715519905090332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,2047,0.027276799082756042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,4095,0.05951840281486511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,4095,0.042208001017570496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,8191,0.06747999787330627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,16383,0.18862719535827638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,16383,0.1182528018951416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,bfloat16,1,0.24501121044158936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,fp8,1,0.295963191986084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,bfloat16,3,0.24419040679931642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,fp8,3,0.2953200101852417
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,bfloat16,7,0.24086399078369142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,fp8,7,0.29489281177520754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,bfloat16,15,0.24264800548553467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,fp8,15,0.29512801170349123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,bfloat16,31,0.2551071882247925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,fp8,31,0.31248159408569337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,bfloat16,63,0.25734400749206543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,3,0.012929600477218629
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,7,0.01311040073633194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,fp8,63,0.31651198863983154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,bfloat16,127,0.27596321105957033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,bfloat16,bfloat16,63,0.07816320061683654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,fp8,127,0.3299743890762329
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,fp8,255,0.0151296004652977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,bfloat16,255,0.4203199863433838
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,fp8,255,0.42579197883605957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,bfloat16,1,0.4697807788848877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,fp8,1,0.5650335788726807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,bfloat16,bfloat16,8191,0.10190080404281616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,bfloat16,3,0.47148799896240234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,fp8,511,0.6649263858795166
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,bfloat16,bfloat16,511,0.8104880332946778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,bfloat16,7,0.47508797645568845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,fp8,3,0.5646495819091797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,fp8,7,0.5685023784637451
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,bfloat16,15,0.4893775939941406
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,fp8,15,0.5753632068634034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,bfloat16,31,0.4970863819122314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,1,0.012452799826860428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,1,0.013425600528717042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,fp8,31,0.6046656131744385
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,3,0.012588800489902496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,bfloat16,127,0.5333280086517334
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,7,0.012484800070524216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,3,0.012987199425697326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,15,0.012476799637079239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,7,0.013291199505329133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,31,0.012518399953842163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,31,0.013155199587345123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,63,0.012521600723266602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,63,0.013380800187587739
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,bfloat16,255,0.8067263603210449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,127,0.012745599448680877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,255,0.014636799693107605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,127,0.013094399869441987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,255,0.015532800555229187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,511,0.01763039976358414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,511,0.019070400297641753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,1023,0.036643201112747194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,2047,0.058475202322006224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,2047,0.04279040098190308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,4095,0.10145759582519531
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,4095,0.06697279810905457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,8191,0.18738399744033812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,8191,0.11832640171051026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,bfloat16,16383,0.3600847959518433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,16383,0.21589279174804688
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,bfloat16,63,0.4966464042663574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,1,0.017319999635219574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,fp8,63,0.603988790512085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,3,0.017203199863433837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,7,0.017209599912166595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,fp8,127,0.6217711925506592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,1,0.019097599387168884
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,15,0.017047999799251555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,15,0.019123199582099914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,31,0.017076799273490907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,31,0.019144000113010408
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,63,0.017076799273490907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,63,0.01905120015144348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,127,0.016896000504493712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,bfloat16,fp8,255,0.8240127563476562
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,127,0.019057600200176238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,255,0.020771199464797975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,511,0.037880000472068784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,1023,0.02693440020084381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,255,0.022100800275802614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,2047,0.10530719757080079
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,1023,0.0451119989156723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,2047,0.06967039704322815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,4095,0.19062880277633668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,4095,0.11818079948425293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,8191,0.2174191951751709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,8191,0.3611664056777954
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,16383,0.41599202156066895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,16383,0.703985595703125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,1,0.011531200259923935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,1,0.01223199963569641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,bfloat16,fp8,15,0.013078400492668152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,3,0.011563199758529662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,7,0.011561600118875503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,3,0.018667200207710268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,7,0.012390399724245072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,3,0.01242400035262108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,15,0.011374399811029435
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,15,0.012388800084590913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,31,0.011548800021409988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,63,0.011472000181674958
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,63,0.01226240023970604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,127,0.011564800143241882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,127,0.012321600317955017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,255,0.013064000010490417
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,255,0.014268800616264343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,511,0.016100800037384032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,511,0.018199999630451203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,1023,0.01671839952468872
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,1023,0.018006399273872375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,511,0.030206400156021117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,1023,0.061585599184036256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,2047,0.016868799924850464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,2047,0.017953599989414214
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,4095,0.01717440038919449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,8191,0.01907680034637451
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,4095,0.018457600474357606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,8191,0.01895039975643158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,16383,0.022753599286079406
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,bfloat16,32767,1.4003328323364257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,16383,0.022411200404167175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,bfloat16,32767,0.03897280097007751
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,32767,0.03134079873561859
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,1,0.011475200206041336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,3,0.011524800211191177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,7,0.012243200093507767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,3,0.012467200309038163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,1,0.01252640038728714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,15,0.011486399918794632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,15,0.012227199971675873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,31,0.0117296002805233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,63,0.01149120032787323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,31,0.013315199315547943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,127,0.01188800036907196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,63,0.01220960021018982
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,127,0.012270399928092956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,255,0.014511999487876893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,511,0.016118399798870087
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,255,0.014291200041770934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,511,0.01786080002784729
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,1023,0.017153599858283998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,1023,0.017836800217628478
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,2047,0.018004800379276275
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,4095,0.01880960017442703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,2047,0.018035200238227845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,4095,0.01852799952030182
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,8191,0.022259199619293214
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,8191,0.02319519966840744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,16383,0.03821280002593994
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,16383,0.02871519923210144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,32767,0.061686402559280394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,fp8,32767,0.04434239864349365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,1,0.014103999733924866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,3,0.012724800407886505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,3,0.014123199880123139
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,7,0.012652799487113953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,7,0.013942399621009826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,15,0.012615999579429627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,15,0.013984000682830811
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,31,0.012625600397586822
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,31,0.014084799587726593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,63,0.012697599828243256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,63,0.014044800400733947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,127,0.01409280002117157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,255,0.015216000378131866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,255,0.016040000319480895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,511,0.01865759938955307
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,511,0.019648000597953796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,1023,0.03675520122051239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,1023,0.02727839946746826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,bfloat16,fp8,31,0.012225600332021714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,2047,0.059987199306488034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,2047,0.04127199947834015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,4095,0.10314400196075439
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,4095,0.06624159812927247
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,32767,0.8018879890441895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,8191,0.18946399688720703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,8191,0.1167072057723999
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,16383,0.21601760387420654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,bfloat16,bfloat16,7,0.011606399714946748
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,16383,0.3647423982620239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,1,0.011721599847078323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,3,0.011750400066375732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,1,0.012787200510501862
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,3,0.01257600039243698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,7,0.012417600303888322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,7,0.01189119964838028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,15,0.011785600334405899
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,15,0.01271039992570877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,31,0.0117807999253273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,31,0.012723200023174286
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,63,0.01178240031003952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,fp8,32767,0.4129216194152832
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,63,0.012671999633312225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,127,0.011825600266456604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,255,0.01361439973115921
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,1,0.012775999307632447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,127,0.012532800436019897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,511,0.016808000206947327
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,511,0.018220800161361694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,1023,0.018518400192260743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,1023,0.01741439998149872
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,2047,0.019011199474334717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,2047,0.01874080002307892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,4095,0.02244639992713928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,4095,0.022251200675964356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,8191,0.03823840022087097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,8191,0.02828640043735504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,16383,0.06061279773712158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,16383,0.04352959990501404
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,127,0.012828800082206725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,bfloat16,32767,0.10412479639053344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,1,0.026519998908042908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,1,0.02874239981174469
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,32767,0.06894720196723939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,3,0.026475200057029726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,3,0.029702401161193846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,7,0.025784000754356384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,7,0.029300799965858458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,15,0.026235198974609374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,15,0.029601600766181946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,31,0.02933120131492615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,63,0.026183998584747313
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,63,0.029631999135017396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,127,0.02659519910812378
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,127,0.029364800453186034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,255,0.04419519901275635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,255,0.03790079951286316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,511,0.06512159705162049
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,bfloat16,fp8,7,0.018638400733470915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,1023,0.11001280546188355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,1023,0.08115839958190918
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,2047,0.19569599628448486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,2047,0.12916799783706664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,4095,0.3658607959747314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,4095,0.22991681098937988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,8191,0.7054160118103028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,8191,0.43208961486816405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,1,0.043663999438285826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,1,0.049446401000022885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,bfloat16,bfloat16,32767,0.7078527927398681
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,3,0.049446401000022885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,3,0.04343999922275543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,7,0.0425135999917984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,7,0.049486398696899414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,bfloat16,fp8,255,0.014561599493026734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,15,0.0495712012052536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,15,0.04239839911460876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,31,0.04306719899177551
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,16383,0.8208720207214355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,31,0.04956960082054138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,63,0.04306559860706329
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,63,0.04928480088710785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,16383,1.3874159812927247
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,255,0.07359520196914673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,127,0.053755199909210204
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,255,0.06705440282821655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,511,0.09598079919815064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,511,0.13038079738616942
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,1023,0.14831680059432983
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,1023,0.2025696039199829
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,bfloat16,31,0.02587679922580719
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,bfloat16,1,0.07588639855384827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,2047,0.3691728115081787
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,2047,0.24987680912017823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,fp8,1,0.08863360285758973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,bfloat16,fp8,511,0.054927998781204225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,bfloat16,7,0.07593119740486146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,bfloat16,3,0.07589120268821717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,fp8,7,0.0902944028377533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,4095,0.4407072067260742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,bfloat16,15,0.07591519951820373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,fp8,15,0.08876000046730041
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,bfloat16,31,0.07727839946746826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,bfloat16,4095,0.7091040134429931
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,bfloat16,63,0.08039360046386719
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,fp8,31,0.08863679766654968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,fp8,63,0.0902463972568512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,bfloat16,127,0.0866703987121582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,fp8,255,0.12236000299453735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,bfloat16,255,0.12754240036010742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,bfloat16,511,0.2161423921585083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,fp8,511,0.1839136004447937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,fp8,1023,0.28154079914093016
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,bfloat16,1023,0.37626399993896487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,1,0.01188800036907196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,1,0.012604799866676331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,3,0.011897599697113037
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,3,0.01268640011548996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,7,0.011868800222873687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,fp8,2047,0.4727503776550293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,7,0.012582400441169738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,15,0.011902400106191636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,31,0.011779200285673141
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,31,0.012643200159072877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,15,0.01279519945383072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,63,0.011791999638080596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,63,0.012564800679683685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,127,0.012636800110340119
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,255,0.01353919953107834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,127,0.012104000151157378
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,255,0.014547200500965118
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,511,0.018372799456119537
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,1023,0.018545599281787874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,1023,0.018489600718021394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,2047,0.020395199954509734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,2047,0.02213599979877472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,bfloat16,fp8,127,0.05106239914894104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,4095,0.03867039978504181
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,4095,0.027979201078414916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,8191,0.042531201243400575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,8191,0.06066880226135254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,16383,0.10476800203323364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,16383,0.06817920207977295
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,fp8,32767,0.11797440052032471
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,bfloat16,1,0.13772159814834595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,fp8,1,0.1611840009689331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,bfloat16,3,0.13765599727630615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,fp8,3,0.16091200113296508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,fp8,3,0.09226880073547364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,fp8,7,0.16109280586242675
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,bfloat16,15,0.1397503972053528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,fp8,15,0.16110719442367555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,bfloat16,31,0.1429759979248047
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,fp8,127,0.09865760207176208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,fp8,31,0.16165759563446044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,bfloat16,63,0.14555039405822753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,fp8,63,0.17065600156784058
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,fp8,127,0.17725280523300171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,bfloat16,255,0.2217616081237793
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,fp8,255,0.23130080699920655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,bfloat16,511,0.40422878265380857
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,bfloat16,bfloat16,2047,0.7169072151184082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,fp8,511,0.34488959312438966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,511,0.016432000696659087
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,bfloat16,1,0.26050240993499757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,bfloat16,1023,0.7221343994140625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,bfloat16,3,0.25754399299621583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,fp8,1023,0.5469488143920899
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,fp8,3,0.30852479934692384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,fp8,7,0.304422402381897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,bfloat16,bfloat16,32767,0.19046239852905272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,bfloat16,15,0.25931520462036134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,fp8,15,0.3049232006072998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,bfloat16,31,0.2638144016265869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,bfloat16,63,0.2666192054748535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,fp8,31,0.3185744047164917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,bfloat16,7,0.13786400556564332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,fp8,63,0.3246623992919922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,bfloat16,127,0.2762576103210449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,fp8,127,0.3298207998275757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,bfloat16,255,0.42020320892333984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,fp8,255,0.4304384231567383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,1,0.01226079985499382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,1,0.013228799402713775
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,bfloat16,bfloat16,127,0.14943039417266846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,3,0.012990400195121765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,7,0.012303999811410903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,7,0.012908799946308136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,15,0.012017600238323212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,15,0.01290079951286316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,31,0.012150400131940842
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,31,0.012982399761676788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,fp8,511,0.6635424137115479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,bfloat16,511,0.772766399383545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,63,0.01218079999089241
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,63,0.013052800297737121
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,255,0.013993600010871887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,255,0.015057599544525147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,127,0.013102400302886962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,511,0.01703999936580658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,511,0.01855839937925339
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,1023,0.021081599593162536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,1023,0.022171199321746826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,2047,0.03848479986190796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,2047,0.028391999006271363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,4095,0.060452800989151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,4095,0.042467200756073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,8191,0.10506880283355713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,8191,0.0686464011669159
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,fp8,1,0.3038431882858276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,16383,0.1901535987854004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,16383,0.11865760087966919
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,1,0.01356000006198883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,3,0.013619199395179749
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,32767,0.3642560005187988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,fp8,32767,0.21652159690856934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,3,0.01433439999818802
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,bfloat16,bfloat16,7,0.2585551977157593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,7,0.013276800513267517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,15,0.013660800457000733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,31,0.013739199936389923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,31,0.014326399564743042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,7,0.014207999408245086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,63,0.013545599579811097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,63,0.014379200339317322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,255,0.015372799336910247
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,255,0.016257600486278535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,127,0.014270399510860444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,511,0.018409599363803864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,511,0.01995680034160614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,1023,0.03766719996929169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,1023,0.027689599990844728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,2047,0.05934079885482788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,4095,0.10289440155029297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,4095,0.06686400175094605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,3,0.01228640004992485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,8191,0.1897711992263794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,8191,0.11728160381317139
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,16383,0.3633343935012817
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,bfloat16,bfloat16,127,0.011963199824094772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,16383,0.21716959476470948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,32767,0.4112207889556885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,1,0.014227199554443359
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,32767,0.7086512088775635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,1,0.011572799831628799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,3,0.011577600240707397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,3,0.012324800342321396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,1,0.012204799801111221
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,7,0.011574400216341018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,15,0.014267200231552124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,15,0.011671999841928482
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,15,0.012076800316572189
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,31,0.01170400008559227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,63,0.011383999884128571
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,63,0.01244800016283989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,127,0.011336000263690948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,127,0.012457600235939026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,31,0.012366399914026261
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,255,0.013075199723243714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,255,0.014364799857139588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,511,0.016150400042533875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,511,0.018092800676822663
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,1023,0.016865600645542145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,1023,0.017982399463653563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,65535,0.8041248321533203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,2047,0.01690399944782257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,2047,0.01818079948425293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,fp8,2047,0.042027199268341066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,4095,0.017123199999332428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,8191,0.018880000710487364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,8191,0.019939200580120088
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,16383,0.020292800664901734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,32767,0.02441119998693466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,4095,0.017956799268722533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,32767,0.02401919960975647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,65535,0.040575999021530154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,1,0.011631999909877778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,65535,0.03245919942855835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,1,0.012065599858760833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,3,0.011638399958610535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,3,0.012211199849843979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,7,0.011326400190591812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,7,0.012307199835777282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,15,0.011502400040626526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,15,0.012121599912643433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,31,0.01154400035738945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,31,0.012273599952459335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,63,0.01154400035738945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,63,0.012276799976825714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,127,0.011684799939393998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,127,0.012228800356388092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,255,0.013126400113105775
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,255,0.014407999813556671
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,511,0.01605439931154251
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,511,0.018087999522686006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,1023,0.01703519970178604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,1023,0.01803999990224838
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,2047,0.017092800140380858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,2047,0.018303999304771425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,4095,0.01722400039434433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,4095,0.01838400065898895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,8191,0.02085600048303604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,8191,0.02035360038280487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,16383,0.024532799422740937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,16383,0.024348799884319306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,32767,0.03970719873905182
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,32767,0.03054080009460449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,bfloat16,65535,0.06269919872283936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,1,0.012694400548934937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,bfloat16,fp8,65535,0.04701600074768066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,1,0.013492800295352936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,3,0.013312000036239623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,7,0.012707200646400452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,7,0.013475200533866883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,15,0.012734399735927581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,15,0.013473600149154663
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,31,0.012647999823093415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,31,0.013596799969673157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,63,0.012729600071907043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,63,0.013491199910640716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,127,0.013195200264453888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,127,0.013500800728797913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,255,0.0147024005651474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,255,0.01525920033454895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,511,0.017664000391960144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,511,0.018939200043678283
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,1023,0.022767999768257143
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,1023,0.02274879962205887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,2047,0.03912639915943146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,2047,0.029451200366020204
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,127,0.013526399433612824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,4095,0.044047999382019046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,fp8,7,0.012177599966526032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,8191,0.07098879814147949
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,8191,0.10739840269088745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,bfloat16,bfloat16,65535,1.404304027557373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,16383,0.1938207983970642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,16383,0.12294399738311768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,bfloat16,bfloat16,16383,0.02046400010585785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,32767,0.3685551881790161
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,32767,0.23668959140777587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,1,0.01162559986114502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,3,0.012299200147390365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,1,0.012399999797344208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,3,0.01167680025100708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,7,0.011574400216341018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,7,0.012491200119256973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,15,0.012468799948692322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,15,0.011577600240707397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,31,0.0117807999253273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,fp8,65535,0.4200079917907715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,65535,0.7135663986206054
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,31,0.012452799826860428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,63,0.012441600114107132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,255,0.013286399841308593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,255,0.014363199472427368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,127,0.012267199903726577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,511,0.016310399770736693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,511,0.01807200014591217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,1023,0.018164800107479097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,2047,0.017345599830150604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,1023,0.01681919991970062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,2047,0.018292799592018127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,4095,0.01910399943590164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,4095,0.018449600040912627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,8191,0.023820799589157105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,8191,0.02427999973297119
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,16383,0.03979359865188599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,16383,0.030139198899269103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,32767,0.06307520270347595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,32767,0.04675039947032929
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,3,0.012697599828243256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,65535,0.10703840255737304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,fp8,65535,0.07404320240020752
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,1,0.018676799535751343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,1,0.020185600221157073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,3,0.018886399269104005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,3,0.020259200036525725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,7,0.01881600022315979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,7,0.020449599623680113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,15,0.018649600446224213
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,15,0.020241600275039674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,31,0.018555200099945067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,31,0.020334400236606598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,63,0.01878879964351654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,127,0.018960000574588777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,127,0.02037599980831146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,255,0.02250239998102188
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,255,0.02428639978170395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,511,0.03960799872875213
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,511,0.03159840106964111
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,1023,0.0626479983329773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,1023,0.04786399900913239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,bfloat16,bfloat16,4095,0.06143680214881897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,2047,0.07220320105552673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,2047,0.10636320114135742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,4095,0.1924847960472107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,4095,0.12213759422302246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,8191,0.3649791955947876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,8191,0.22263519763946532
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,16383,0.7068416118621826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,63,0.011499200016260147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,16383,0.4189807891845703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,bfloat16,bfloat16,127,0.011643200367689132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,1,0.028240001201629637
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,1,0.031123200058937074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,3,0.031272000074386595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,3,0.028459200263023378
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,7,0.02808319926261902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,7,0.03127520084381104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,15,0.028438401222229005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,15,0.031079998612403868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,31,0.03153280019760132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,63,0.0282368004322052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,63,0.03151200115680695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,127,0.02857759892940521
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,127,0.03156639933586121
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,255,0.046409600973129274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,255,0.039504000544548036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,511,0.06826400160789489
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,511,0.05698879957199097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,1023,0.11333760023117065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,bfloat16,32767,1.4024239540100099
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,1023,0.0831712007522583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,32767,0.8172464370727539
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,2047,0.19703840017318724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,2047,0.1301695942878723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,bfloat16,fp8,63,0.020084799826145174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,1,0.048342400789260866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,1,0.054808002710342404
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,4095,0.23208000659942626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,3,0.047804799675941465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,3,0.05490720272064209
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,7,0.048556798696517946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,7,0.05360640287399292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,15,0.0488783985376358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,15,0.05453119874000549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,31,0.04778240025043488
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,31,0.054897600412368776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,fp8,8191,0.43317761421203616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,8191,0.7145264148712158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,63,0.053763198852539065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,127,0.05608159899711609
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,255,0.07085599899291992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,127,0.056683200597763064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,255,0.08302080035209655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,511,0.12144639492034912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,511,0.10072640180587769
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,1023,0.15225280523300172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,1023,0.2082911968231201
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,2047,0.374017596244812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,2047,0.24490718841552733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,1,0.01199679970741272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,3,0.012083200365304947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,3,0.012727999687194824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,7,0.012086399644613267
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,fp8,4095,0.4508768081665039
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,7,0.012880000472068786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,4095,0.723089599609375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,15,0.012884800136089326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,15,0.01207199990749359
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,31,0.012886400520801543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,31,0.01188800036907196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,63,0.011872000247240066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,63,0.012707200646400452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,31,0.028377598524093627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,127,0.012588800489902496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,255,0.013761599361896516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,127,0.012036799639463424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,511,0.016836799681186676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,511,0.018300800025463103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,1023,0.017591999471187593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,1023,0.018622399866580965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,2047,0.018940800428390504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,2047,0.018772800266742707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,4095,0.022324800491333008
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,4095,0.022572800517082214
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,8191,0.04041920006275177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,8191,0.030393600463867188
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,16383,0.06204000115394592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,16383,0.04541119933128357
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,32767,0.07166399955749511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,32767,0.10667200088500976
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,65535,0.1214959979057312
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,bfloat16,65535,0.19279520511627196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,bfloat16,bfloat16,4095,0.43007678985595704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,bfloat16,1,0.0828000009059906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,fp8,1,0.09317119717597962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,bfloat16,3,0.08271679878234864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,bfloat16,bfloat16,63,0.04930559992790222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,bfloat16,7,0.08297759890556336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,fp8,3,0.09456800222396851
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,bfloat16,15,0.08255360126495362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,bfloat16,31,0.08277599811553955
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,fp8,31,0.09472960233688354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,bfloat16,63,0.08700479865074158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,fp8,63,0.09713119864463807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,bfloat16,127,0.0915727972984314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,fp8,127,0.1026144027709961
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,bfloat16,255,0.13492799997329713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,fp8,255,0.12809280157089234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,bfloat16,511,0.21577761173248292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,fp8,511,0.18925280570983888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,1,0.012254399806261062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,fp8,1023,0.2894464015960693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,bfloat16,1,0.14847199916839598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,bfloat16,2047,0.7165296077728271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,fp8,1,0.17163039445877076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,bfloat16,fp8,255,0.014620800316333771
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,fp8,2047,0.473803186416626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,fp8,3,0.16946400403976442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,bfloat16,3,0.14872959852218628
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,bfloat16,7,0.14840480089187622
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,fp8,7,0.16990560293197632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,bfloat16,15,0.14691200256347656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,fp8,15,0.17219359874725343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,bfloat16,31,0.15095839500427247
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,bfloat16,63,0.15475039482116698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,fp8,63,0.18328640460968018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,bfloat16,127,0.16275520324707032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,fp8,127,0.18381279706954956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,bfloat16,255,0.23916480541229249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,fp8,255,0.23754560947418213
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,bfloat16,511,0.40116162300109864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,fp8,7,0.09324480295181274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,1,0.011959999799728394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,1,0.01295199990272522
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,3,0.01188800036907196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,3,0.012747199833393097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,bfloat16,1023,0.7278384208679199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,7,0.011963199824094772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,15,0.012012799829244613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,7,0.012886400520801543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,fp8,1023,0.5511600017547608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,15,0.012873600423336028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,31,0.012055999785661697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,63,0.01202400028705597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,31,0.012849600613117218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,127,0.012011200189590454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,bfloat16,1023,0.38023359775543214
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,127,0.01268479973077774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,255,0.013655999302864074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,511,0.018248000741004945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,511,0.01659359931945801
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,255,0.014715200662612915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,1023,0.019023999571800232
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,1023,0.018566399812698364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,2047,0.021076799929142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,2047,0.02236959934234619
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,4095,0.03858399987220764
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,8191,0.04538559913635254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,8191,0.0625823974609375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,16383,0.10704159736633301
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,16383,0.07085760235786438
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,32767,0.19336479902267456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,32767,0.12161760330200196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,1,0.013483199477195739
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,bfloat16,65535,0.36772799491882324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,65535,0.22197599411010743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,1,0.014265599846839904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,7,0.01345600038766861
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,3,0.013331200182437896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,7,0.014094400405883788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,15,0.013415999710559845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,3,0.014161600172519684
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,15,0.01422239989042282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,31,0.013475200533866883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,31,0.014193600416183472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,63,0.013577599823474885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,127,0.013291199505329133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,63,0.014299200475215912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,255,0.015009599924087524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,255,0.015860800445079804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,511,0.018089599907398224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,511,0.01967200040817261
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,1023,0.02249760031700134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,bfloat16,fp8,15,0.09458400011062622
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,1023,0.023550400137901308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,2047,0.040398401021957395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,2047,0.02993279993534088
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,4095,0.0619983971118927
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,4095,0.04480479955673218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,8191,0.07160639762878418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,16383,0.19826719760894776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,63,0.013142399489879608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,16383,0.12640960216522218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,bfloat16,fp8,4095,0.02834399938583374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,32767,0.37448959350585936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,32767,0.22546560764312745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,65535,0.42232961654663087
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,65535,0.7242735862731934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,fp8,31,0.16997120380401612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,1,0.011497599631547928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,3,0.011524800211191177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,1,0.012151999771595002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,7,0.011419200152158738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,3,0.012246400117874146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,7,0.012076800316572189
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,15,0.011550399661064147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,15,0.01210239976644516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,31,0.011699199676513672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,31,0.012171199917793274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,63,0.011596799641847611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,63,0.012193600088357926
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,127,0.011628799885511399
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,127,0.012132800370454788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,131071,1.4194144248962401
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,255,0.013281600177288055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,bfloat16,fp8,511,0.36080479621887207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,511,0.018374399840831758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,131071,0.821497631072998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,511,0.01627040058374405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,1023,0.01660960018634796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,1023,0.018052799999713896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,2047,0.017995199561119078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,2047,0.01663679927587509
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,bfloat16,8191,0.10748319625854492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,8191,0.01977120041847229
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,16383,0.022404800355434417
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,16383,0.023263999819755556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,32767,0.024036799371242524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,32767,0.02374880015850067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,65535,0.028174400329589844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,65535,0.02749919891357422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,131071,0.04683839976787567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,131071,0.0354559987783432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,1,0.011561600118875503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,1,0.01231200024485588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,3,0.011553599685430526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,3,0.012296000123023986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,7,0.011611200124025344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,7,0.012281599640846252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,15,0.01162080019712448
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,15,0.012256000190973282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,31,0.011512000113725662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,31,0.012256000190973282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,63,0.01148959994316101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,63,0.012348800152540206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,127,0.01156959980726242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,127,0.012372799962759019
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,255,0.013240000605583191
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,255,0.014292800426483154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,511,0.016171200573444365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,511,0.018054400384426118
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,1023,0.016859200596809388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,1023,0.017774400115013123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,2047,0.016996799409389494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,2047,0.01786559969186783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,4095,0.017233599722385407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,4095,0.01815039962530136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,8191,0.01926400065422058
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,8191,0.020110400021076204
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,16383,0.023902399837970732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,32767,0.027544000744819643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,32767,0.027718400955200194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,16383,0.023647999763488768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,65535,0.043670400977134705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,65535,0.033460798859596255
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,bfloat16,131071,0.06512640118598938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,1,0.012436799705028534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,bfloat16,fp8,131071,0.050440001487731936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,bfloat16,fp8,127,0.014321599900722504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,1,0.013681599497795105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,3,0.012388800084590913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,3,0.013359999656677246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,7,0.012467200309038163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,7,0.013446399569511413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,15,0.012382400035858155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,15,0.013299199938774108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,31,0.01252799928188324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,31,0.013425600528717042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,63,0.01239679977297783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,63,0.0134320005774498
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,127,0.012587200105190276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,127,0.013400000333786011
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,255,0.01435520052909851
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,255,0.015272000432014465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,511,0.017164799571037292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,511,0.01896799951791763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,1023,0.019582399725914003
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,255,0.014263999462127686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,1023,0.019152000546455383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,2047,0.021833600103855134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,2047,0.02290239930152893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,4095,0.029212799668312073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,4095,0.039499199390411376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,8191,0.06322559714317322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,8191,0.046096000075340274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,4095,0.016897599399089813
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,16383,0.10995039939880372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,16383,0.07391520142555237
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,bfloat16,8191,0.018884800374507904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,32767,0.19705120325088502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,1,0.01170400008559227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,65535,0.2250511884689331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,65535,0.37283360958099365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,3,0.011577600240707397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,7,0.01149279996752739
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,7,0.012163200229406358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,131071,0.42112159729003906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,3,0.012379200011491776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,15,0.011475200206041336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,31,0.011552000045776367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,31,0.012230399996042252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,63,0.011547199636697768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,15,0.012240000069141388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,63,0.012214399874210358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,127,0.01141119971871376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,255,0.013107199966907502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,127,0.012257599830627441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,255,0.014156800508499146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,511,0.016131199896335602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,1023,0.016809600591659545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,1023,0.018222400546073915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,2047,0.016979199647903443
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,2047,0.01834239959716797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,4095,0.017212800681591034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,4095,0.018518400192260743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,8191,0.020411199331283568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,8191,0.020497600734233856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,16383,0.027030399441719054
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,16383,0.02749119997024536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,32767,0.04362559914588928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,32767,0.03357119858264923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,65535,0.06617439985275268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,65535,0.05074399709701538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,bfloat16,131071,0.11034400463104248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,131071,0.07767360210418701
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,1,0.014440000057220459
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,1,0.015558399260044098
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,3,0.014417600631713868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,3,0.015502400696277618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,7,0.014401599764823914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,7,0.01555200070142746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,15,0.014542399346828461
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,15,0.01579039990901947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,31,0.014591999351978302
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,31,0.015475200116634369
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,63,0.014705599844455719
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,63,0.015502400696277618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,127,0.01462399959564209
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,127,0.015675200521945952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,255,0.01656160056591034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,255,0.01748960018157959
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,511,0.019486400485038757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,bfloat16,fp8,4095,0.018094399571418764
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,511,0.021206399798393248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,1023,0.038324800133705136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,1023,0.028883200883865357
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,2047,0.06036800146102905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,fp8,32767,0.12515360116958618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,2047,0.042556801438331605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,4095,0.0694320023059845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,4095,0.10461119413375855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,1,0.012452799826860428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,8191,0.11881760358810425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,8191,0.19048479795455933
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,16383,0.2196880102157593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,bfloat16,bfloat16,131071,0.7169904232025146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,16383,0.36515839099884034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,1,0.020996800065040587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,1,0.02220959961414337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,32767,0.4151519775390625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,3,0.022275200486183165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,3,0.020745599269866945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,bfloat16,fp8,511,0.01799200028181076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,32767,0.7114960193634033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,7,0.022494399547576906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,15,0.020980800688266753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,7,0.020960000157356263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,15,0.022622400522232057
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,31,0.022515200078487396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,63,0.020891200006008147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,63,0.022767999768257143
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,127,0.02083519995212555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,127,0.022563199698925018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,255,0.02535040080547333
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,255,0.026148799061775207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,511,0.0430976003408432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,511,0.034001600742340085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,1023,0.06727039813995361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,1023,0.049537599086761475
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,2047,0.10890400409698486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,2047,0.0752943992614746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,bfloat16,65535,1.4097552299499512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,bfloat16,fp8,65535,0.8150976181030274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,4095,0.19413280487060547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,4095,0.12394720315933228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,1,0.036820799112319946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,1,0.033980798721313474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,8191,0.22842400074005126
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,3,0.03380799889564514
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,8191,0.37033441066741946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,7,0.03693119883537292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,7,0.034492799639701845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,15,0.03404319882392883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,15,0.036924800276756285
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,31,0.03442080020904541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,31,0.037088000774383546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,63,0.03400320112705231
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,fp8,16383,0.4273359775543213
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,63,0.03755680024623871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,127,0.03447040021419525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,127,0.03721440136432648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,255,0.04636000096797943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,255,0.05432800054550171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,16383,0.7225584030151367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,511,0.07368800044059753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,511,0.06182079911231995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,1023,0.11967359781265259
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,2047,0.13663519620895387
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,2047,0.20396800041198732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,4095,0.3726896047592163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,4095,0.2390496015548706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,1,0.011587200313806533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,3,0.0117296002805233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,1,0.0126351997256279
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,3,0.012494400143623352
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,7,0.012489599734544754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,8191,0.43321919441223145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,15,0.01170239970088005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,bfloat16,8191,0.7207615852355957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,31,0.011707200109958649
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,15,0.012435200065374375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,63,0.01162080019712448
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,31,0.012438400089740754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,63,0.012280000001192093
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,127,0.011640000343322753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,255,0.013182400166988373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,127,0.012484800070524216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,bfloat16,bfloat16,31,0.02085919976234436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,511,0.01618559956550598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,511,0.01831679940223694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,1023,0.017067199945449828
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,2047,0.017239999771118165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,2047,0.018492799997329713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,4095,0.01879200041294098
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,4095,0.018807999789714813
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,8191,0.024062399566173554
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,8191,0.02422720044851303
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,16383,0.043249601125717164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,16383,0.03369120061397553
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,32767,0.06626240015029908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,32767,0.05050879716873169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,65535,0.07790079712867737
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,3,0.036883199214935304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,131071,0.20144639015197754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,131071,0.12916799783706664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,1,0.054983997344970705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,1,0.0601311981678009
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,3,0.054902398586273195
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,3,0.060027199983596805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,7,0.05502880215644836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,7,0.0601311981678009
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,15,0.05520960092544556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,15,0.060399997234344485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,bfloat16,fp8,1023,0.08942080140113831
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,31,0.05527359843254089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,31,0.060313600301742556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,63,0.055953598022460936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,63,0.06109439730644226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,127,0.0613424003124237
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,127,0.06258879899978638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,255,0.08434240221977234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,255,0.07789440155029297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,511,0.12486399412155151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,511,0.1110368013381958
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,7,0.011868800222873687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,1023,0.2116368055343628
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,1023,0.16119840145111083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,bfloat16,1,0.09386240243911743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,2047,0.2569216012954712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,2047,0.3781984090805054
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,255,0.014283199608325959
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,fp8,1,0.10642240047454835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,bfloat16,3,0.0945792019367218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,fp8,1023,0.01804800033569336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,bfloat16,7,0.09453759789466858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,fp8,4095,0.45098400115966797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,fp8,7,0.10662239789962769
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,bfloat16,bfloat16,4095,0.7202320098876953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,bfloat16,15,0.09451680183410645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,fp8,15,0.10589920282363892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,fp8,31,0.10694559812545776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,bfloat16,31,0.09374719858169556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,fp8,63,0.10942239761352539
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,bfloat16,127,0.10455679893493652
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,fp8,127,0.11440800428390503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,bfloat16,255,0.14070240259170533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,fp8,255,0.14236639738082885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,bfloat16,511,0.21549758911132813
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,bfloat16,bfloat16,65535,0.11132960319519043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,fp8,511,0.204748797416687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,1,0.012011200189590454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,1,0.012703999876976013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,bfloat16,1023,0.38590240478515625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,3,0.011879999935626984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,fp8,1023,0.30692479610443113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,7,0.011838400363922119
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,3,0.012464000284671784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,15,0.011878400295972823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,7,0.012830400466918945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,15,0.012569600343704223
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,31,0.011897599697113037
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,31,0.012628799676895142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,63,0.011795199662446975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,63,0.012753599882125854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,127,0.012049599736928939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,127,0.012647999823093415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,255,0.013799999654293061
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,255,0.014758400619029999
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,bfloat16,2047,0.7245327949523925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,511,0.016791999340057373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,511,0.018372799456119537
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,1023,0.017612800002098083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,2047,0.019204799830913544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,2047,0.018857599794864656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,4095,0.022787199914455415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,1023,0.018356800079345703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,4095,0.022579200565814972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,8191,0.04092639982700348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,8191,0.03060159981250763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,16383,0.048451200127601624
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,16383,0.06547359824180603
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,32767,0.07486559748649597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,32767,0.10961760282516479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,65535,0.19673919677734375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,bfloat16,1,0.013660800457000733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,131071,0.22632160186767578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,bfloat16,131071,0.37452480792999265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,bfloat16,3,0.01441120058298111
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,fp8,3,0.014824000000953675
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,fp8,7,0.0147599995136261
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,bfloat16,15,0.014035199582576752
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,fp8,3,0.10536479949951172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,fp8,15,0.015041600167751312
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,bfloat16,31,0.013655999302864074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,fp8,31,0.014684799313545226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,bfloat16,63,0.01385280042886734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,fp8,63,0.015057599544525147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,bfloat16,127,0.01446239948272705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,fp8,127,0.014856000244617463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,bfloat16,255,0.015855999290943147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,bfloat16,63,0.10320800542831421
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,fp8,255,0.017030400037765504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,bfloat16,511,0.018801599740982056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,fp8,511,0.020310400426387785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,bfloat16,1023,0.0335343986749649
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,fp8,1023,0.02658880054950714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,bfloat16,2047,0.04664640128612518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,bfloat16,1,0.013264000415802002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,fp8,2047,0.036831998825073244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,bfloat16,3,0.01332319974899292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,fp8,1,0.014448000490665436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,fp8,3,0.014585599303245544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,bfloat16,7,0.013276800513267517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,fp8,7,0.014619199931621552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,bfloat16,15,0.013822400569915771
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,fp8,15,0.014595200121402741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,bfloat16,31,0.013825599849224091
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,fp8,31,0.014601600170135499
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,bfloat16,63,0.01371839940547943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,fp8,63,0.014630399644374847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,bfloat16,127,0.014300799369812012
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,fp8,127,0.014510400593280792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,bfloat16,255,0.016120000183582305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,fp8,255,0.016569599509239197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,bfloat16,511,0.019569599628448488
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,fp8,511,0.020187200605869295
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,bfloat16,1023,0.04020000100135803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,fp8,1023,0.029172798991203307
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,bfloat16,2047,0.06209440231323242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,bfloat16,fp8,2047,0.4874608039855957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,bfloat16,fp8,2047,0.04371039867401123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,bfloat16,1,0.017220799624919892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,bfloat16,3,0.017689600586891174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,fp8,3,0.018878400325775146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,bfloat16,7,0.01756799966096878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,fp8,1,0.018721599876880646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,fp8,7,0.018918399512767792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,bfloat16,15,0.017579199373722078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,fp8,15,0.01870719939470291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,fp8,31,0.019020800292491914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,bfloat16,63,0.017243200540542604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,fp8,63,0.01929280012845993
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,bfloat16,127,0.017423999309539796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,fp8,127,0.019233599305152893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,bfloat16,255,0.021825599670410156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,fp8,255,0.023126399517059325
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,bfloat16,fp8,65535,0.12608319520950317
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,bfloat16,511,0.04008159935474396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,fp8,511,0.03150720000267029
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,bfloat16,1023,0.06526079773902893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,fp8,1023,0.04785119891166687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,fp8,1,0.014659200608730317
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,fp8,2047,0.07237120270729065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,bfloat16,2047,0.10828959941864014
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,bfloat16,1,0.024691200256347655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,fp8,1,0.027619200944900512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,bfloat16,3,0.024695999920368195
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,bfloat16,bfloat16,7,0.014056000113487243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,fp8,7,0.027723199129104613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,fp8,15,0.02773439884185791
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,bfloat16,31,0.02542079985141754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,fp8,31,0.02778880000114441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,bfloat16,63,0.024982400238513947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,fp8,63,0.028142398595809935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,bfloat16,127,0.027054399251937866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,fp8,127,0.027875199913978577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,bfloat16,255,0.044705599546432495
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,fp8,255,0.037134400010108946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,bfloat16,511,0.06603519916534424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,fp8,511,0.054199999570846556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,bfloat16,1023,0.11208640336990357
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,fp8,1023,0.08094720244407654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,bfloat16,1,0.011840000003576278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,bfloat16,2047,0.19629600048065185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,fp8,1,0.01252640038728714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,fp8,2047,0.1282863974571228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,fp8,3,0.0125231996178627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,bfloat16,3,0.011912000179290772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,bfloat16,7,0.011791999638080596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,fp8,7,0.012515200674533844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,fp8,15,0.012507200241088867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,bfloat16,31,0.011827199906110763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,fp8,31,0.01252480000257492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,bfloat16,63,0.011905600130558015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,fp8,63,0.012484800070524216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,bfloat16,127,0.012011200189590454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,fp8,127,0.01252480000257492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,bfloat16,255,0.013606399297714233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,fp8,255,0.014580799639225006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,bfloat16,511,0.01658560037612915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,fp8,511,0.018561600148677825
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,bfloat16,1023,0.01687840074300766
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,fp8,1023,0.017847999930381775
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,bfloat16,2047,0.017158399522304534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,fp8,2047,0.01804320067167282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,bfloat16,1,0.011531200259923935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,fp8,1,0.012270399928092956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,bfloat16,3,0.011550399661064147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,fp8,3,0.012291199713945388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,bfloat16,7,0.011503999680280685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,fp8,7,0.012342400103807449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,bfloat16,15,0.011622399836778641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,fp8,15,0.012481600046157837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,bfloat16,31,0.011526399850845337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,fp8,31,0.012468799948692322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,bfloat16,63,0.011660800129175187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,fp8,63,0.012319999933242797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,bfloat16,bfloat16,31,0.017403200268745422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,bfloat16,127,0.011692799627780914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,fp8,127,0.01233920007944107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,fp8,255,0.014507199823856353
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,bfloat16,511,0.016196799278259278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,fp8,511,0.01847199946641922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,bfloat16,1023,0.016123199462890626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,fp8,1023,0.017420800030231477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,bfloat16,2047,0.016547200083732606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,fp8,2047,0.017652800679206847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,bfloat16,1,0.011596799641847611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,fp8,1,0.012291199713945388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,bfloat16,3,0.01167680025100708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,fp8,3,0.012291199713945388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,bfloat16,7,0.011627200245857238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,fp8,7,0.012307199835777282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,fp8,15,0.012299200147390365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,bfloat16,31,0.011700800061225891
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,fp8,3,0.027856001257896425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,bfloat16,7,0.024916799366474153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,fp8,31,0.012409599870443344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,bfloat16,63,0.011591999977827071
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,fp8,63,0.012332800030708312
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,bfloat16,127,0.011587200313806533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,fp8,127,0.012363199889659882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,bfloat16,255,0.013259199261665345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,fp8,255,0.014313599467277527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,bfloat16,511,0.016246399283409117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,fp8,511,0.018116800487041472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,bfloat16,1023,0.015940800309181213
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,fp8,1023,0.017047999799251555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,bfloat16,2047,0.016420799493789672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,fp8,2047,0.017367999255657195
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,bfloat16,1,0.011587200313806533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,fp8,1,0.012358400225639343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,bfloat16,3,0.01159520000219345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,fp8,3,0.012316799908876418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,bfloat16,7,0.011596799641847611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,fp8,7,0.012254399806261062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,bfloat16,15,0.011585599929094314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,fp8,15,0.012241599708795547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,bfloat16,31,0.011532799899578094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,fp8,31,0.012414400279521943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,bfloat16,63,0.011590400338172912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,bfloat16,bfloat16,15,0.011816000193357467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,fp8,63,0.012415999919176102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,bfloat16,127,0.011635199934244157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,fp8,127,0.012294399738311767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,fp8,255,0.014305600523948669
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,bfloat16,511,0.016190400719642638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,fp8,511,0.017998400330543517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,bfloat16,1023,0.0160863995552063
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,fp8,1023,0.01716320067644119
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,bfloat16,2047,0.016577599942684172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,fp8,2047,0.017502400279045104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,bfloat16,1,0.011990399658679962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,fp8,1,0.012665599584579468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,bfloat16,3,0.011908800154924393
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,fp8,3,0.01271200031042099
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,bfloat16,7,0.011670400202274323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,fp8,7,0.012636800110340119
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,fp8,15,0.012495999783277511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,bfloat16,31,0.011913599818944931
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,fp8,31,0.01252640038728714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,bfloat16,63,0.012006399780511856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,fp8,63,0.012542399764060973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,bfloat16,127,0.012008000165224075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,fp8,127,0.012479999661445617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,bfloat16,255,0.013620799779891968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,fp8,255,0.014486399292945863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,bfloat16,511,0.01658879965543747
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,fp8,511,0.01849599927663803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,bfloat16,1023,0.01690559983253479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,fp8,1023,0.017924800515174866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,bfloat16,2047,0.017246399819850922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,fp8,2047,0.018199999630451203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,bfloat16,bfloat16,255,0.013374400138854981
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,fp8,1,0.01239520013332367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,bfloat16,3,0.011646399646997452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,fp8,3,0.01228479966521263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,bfloat16,7,0.011825600266456604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,fp8,7,0.012268800288438797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,bfloat16,15,0.011736000329256058
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,fp8,15,0.012316799908876418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,bfloat16,31,0.011774399876594543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,fp8,31,0.012326399981975555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,bfloat16,63,0.011736000329256058
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,fp8,63,0.012321600317955017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,bfloat16,127,0.011655999720096588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,bfloat16,bfloat16,15,0.011683200299739838
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,fp8,127,0.012345600128173827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,bfloat16,255,0.013342399895191193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,fp8,255,0.014347200095653535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,bfloat16,511,0.01629599928855896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,fp8,511,0.018177600204944612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,bfloat16,1023,0.016212800145149232
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,fp8,1023,0.017350399494171144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,bfloat16,2047,0.016577599942684172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,fp8,2047,0.017654399573802947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,fp8,1,0.012415999919176102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,bfloat16,3,0.011529599875211715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,fp8,3,0.01244800016283989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,bfloat16,7,0.011529599875211715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,fp8,7,0.012443199753761292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,bfloat16,15,0.01154239997267723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,fp8,15,0.012531200051307678
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,bfloat16,31,0.011606399714946748
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,fp8,31,0.012488000094890594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,bfloat16,63,0.01167839989066124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,fp8,63,0.012491200119256973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,bfloat16,127,0.01167680025100708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,fp8,127,0.012383999675512314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,bfloat16,255,0.013264000415802002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,fp8,255,0.014342400431632995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,bfloat16,511,0.01627359986305237
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,fp8,511,0.01803999990224838
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,bfloat16,1023,0.015988799929618835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,fp8,1023,0.017177599668502807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,bfloat16,2047,0.016476799547672272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,fp8,2047,0.017518399655818938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,bfloat16,1,0.01175680011510849
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,fp8,1,0.012508800625801087
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,bfloat16,3,0.011816000193357467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,fp8,3,0.01252480000257492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,bfloat16,7,0.011803200095891952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,fp8,7,0.012532800436019897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,bfloat16,15,0.011776000261306763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,fp8,15,0.012470400333404541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,bfloat16,bfloat16,15,0.012027200311422348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,bfloat16,31,0.011684799939393998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,fp8,31,0.01252640038728714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,bfloat16,63,0.011737599968910217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,fp8,63,0.012441600114107132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,bfloat16,127,0.011713600158691407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,fp8,127,0.01268800050020218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,bfloat16,255,0.01374559998512268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,fp8,255,0.014363199472427368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,bfloat16,511,0.016689600050449373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,fp8,511,0.01801760047674179
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,bfloat16,1023,0.016524800658226015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,fp8,1023,0.017294399440288544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,bfloat16,2047,0.018216000497341157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,bfloat16,fp8,2047,0.01775519996881485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,bfloat16,1,0.013023999333381654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,bfloat16,bfloat16,1,0.01168000027537346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,fp8,1,0.01406240016222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,bfloat16,3,0.01340160071849823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,fp8,3,0.013865600526332855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,bfloat16,7,0.013344000279903411
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,fp8,7,0.01406240016222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,bfloat16,15,0.013422399759292603
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,fp8,15,0.01401119977235794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,bfloat16,31,0.013273599743843078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,fp8,31,0.014022399485111237
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,bfloat16,63,0.013492800295352936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,fp8,63,0.014107200503349304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,bfloat16,127,0.013808000087738036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,fp8,127,0.013984000682830811
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,bfloat16,255,0.01515360027551651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,fp8,255,0.015971200168132783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,bfloat16,511,0.017735999822616578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,fp8,511,0.01969279944896698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,bfloat16,1023,0.021430400013923646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,fp8,1023,0.021297599375247955
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,bfloat16,2047,0.02982560098171234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,bfloat16,1,0.01281919926404953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,bfloat16,fp8,2047,0.025260800123214723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,fp8,1,0.013792000710964203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,bfloat16,bfloat16,15,0.024916799366474153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,bfloat16,3,0.012852799892425538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,fp8,3,0.013808000087738036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,bfloat16,15,0.01284160017967224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,fp8,15,0.013644799590110779
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,bfloat16,31,0.01281919926404953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,fp8,31,0.013649600744247436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,bfloat16,63,0.012940800189971924
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,bfloat16,127,0.01324319988489151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,fp8,63,0.013628800213336945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,fp8,127,0.013630400598049163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,bfloat16,255,0.014841599762439728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,fp8,255,0.015502400696277618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,bfloat16,511,0.017932799458503724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,fp8,511,0.01900320053100586
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,bfloat16,1023,0.023230400681495667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,fp8,1023,0.02296479940414429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,fp8,2047,0.030763199925422667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,bfloat16,2047,0.041631999611854556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,bfloat16,1,0.012915199995040894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,fp8,1,0.013489599525928497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,fp8,3,0.013713599741458892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,bfloat16,3,0.012630400061607362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,bfloat16,7,0.012848000228404998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,fp8,7,0.013739199936389923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,bfloat16,15,0.012598399817943574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,bfloat16,bfloat16,255,0.01326880007982254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,fp8,15,0.013689599931240082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,bfloat16,31,0.013033600151538849
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,fp8,31,0.013391999900341034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,fp8,63,0.013551999628543854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,bfloat16,63,0.012999999523162841
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,bfloat16,127,0.013199999928474426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,fp8,127,0.013406400382518769
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,bfloat16,255,0.014883199334144592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,fp8,255,0.015836800634860992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,bfloat16,511,0.018697600066661834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,fp8,511,0.019017599523067474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,bfloat16,1023,0.03738720118999481
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,bfloat16,2047,0.060203200578689574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,fp8,2047,0.041547200083732604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,bfloat16,1,0.016627199947834015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,fp8,1,0.017931200563907623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,fp8,3,0.017867200076580048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,fp8,7,0.017876799404621124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,bfloat16,15,0.016521599888801575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,fp8,15,0.017871999740600587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,bfloat16,31,0.016739200055599212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,fp8,31,0.018033599853515624
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,bfloat16,63,0.01677280068397522
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,fp8,63,0.018055999279022218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,bfloat16,127,0.017267200350761413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,fp8,127,0.018004800379276275
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,bfloat16,7,0.013052800297737121
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,bfloat16,fp8,7,0.013555200397968292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,bfloat16,255,0.02128159999847412
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,fp8,255,0.022158400714397432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,bfloat16,511,0.037915199995040894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,fp8,511,0.029497599601745604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,bfloat16,1023,0.06195840239524841
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,fp8,1023,0.04627839922904968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,bfloat16,1,0.011801599711179733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,bfloat16,2047,0.10628479719161987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,fp8,2047,0.07076320052146912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,fp8,1,0.012503999471664428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,bfloat16,3,0.011851199716329575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,bfloat16,7,0.011827199906110763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,fp8,7,0.012540799379348756
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,bfloat16,15,0.011876799911260606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,fp8,15,0.012697599828243256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,bfloat16,31,0.011798399686813354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,fp8,31,0.012718400359153748
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,bfloat16,63,0.011926399916410447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,fp8,63,0.012705600261688233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,bfloat16,127,0.011923199892044068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,fp8,127,0.012676799297332763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,bfloat16,255,0.013425600528717042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,fp8,255,0.014681600034236908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,bfloat16,511,0.01637759953737259
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,fp8,511,0.01852159947156906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,bfloat16,1023,0.01687999963760376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,bfloat16,fp8,1023,0.028278398513793945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,fp8,1023,0.01815200001001358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,fp8,2047,0.018355199694633485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,bfloat16,3,0.016515199840068818
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,fp8,1,0.012383999675512314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,bfloat16,bfloat16,7,0.016667200624942778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,fp8,3,0.012382400035858155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,fp8,7,0.012345600128173827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,bfloat16,15,0.011692799627780914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,fp8,15,0.012460800260305405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,bfloat16,31,0.01178240031003952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,fp8,31,0.012438400089740754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,bfloat16,63,0.01178399994969368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,fp8,63,0.01242400035262108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,bfloat16,127,0.011689600348472596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,fp8,127,0.012404800206422806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,bfloat16,255,0.01326880007982254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,fp8,255,0.014295999705791474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,bfloat16,511,0.016254399716854096
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,fp8,511,0.018083199858665466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,bfloat16,1023,0.016457599401474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,bfloat16,bfloat16,1,0.0117296002805233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,fp8,1023,0.017523199319839478
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,bfloat16,2047,0.01722400039434433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,bfloat16,1,0.01162400022149086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,fp8,1,0.01239520013332367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,bfloat16,3,0.01165120005607605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,fp8,3,0.012291199713945388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,bfloat16,7,0.011673600226640702
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,fp8,7,0.012449599802494049
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,bfloat16,15,0.011832000315189361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,fp8,3,0.012614400684833526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,fp8,15,0.012483199685811996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,bfloat16,31,0.011879999935626984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,bfloat16,63,0.011894399672746659
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,fp8,63,0.012548799812793731
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,bfloat16,127,0.011840000003576278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,fp8,127,0.012425599992275238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,bfloat16,255,0.013446399569511413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,fp8,255,0.014451199769973755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,bfloat16,bfloat16,2047,0.017188799381256104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,bfloat16,511,0.016359999775886536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,bfloat16,1,0.011844799667596818
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,fp8,511,0.018110400438308714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,bfloat16,1023,0.01653279960155487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,fp8,1023,0.01732639968395233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,bfloat16,2047,0.018118399381637573
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,fp8,2047,0.017876799404621124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,bfloat16,3,0.011660800129175187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,bfloat16,1,0.012028799951076507
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,fp8,1,0.012758399546146392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,bfloat16,3,0.011966399848461151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,bfloat16,7,0.011710400134325028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,bfloat16,7,0.011937599629163742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,fp8,7,0.012665599584579468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,bfloat16,15,0.012011200189590454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,fp8,15,0.012734399735927581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,fp8,31,0.012689599394798278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,bfloat16,63,0.011896000057458878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,fp8,63,0.012948800623416901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,bfloat16,127,0.012038400024175644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,fp8,127,0.012654399871826172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,bfloat16,255,0.013811199367046357
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,fp8,255,0.014585599303245544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,bfloat16,511,0.0169855996966362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,fp8,511,0.018313600122928618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,bfloat16,1023,0.017817600071430205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,fp8,1023,0.017560000717639922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,bfloat16,2047,0.019860799610614776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,fp8,2047,0.021110400557518005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,bfloat16,1,0.01526080071926117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,fp8,1,0.016017599403858183
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,bfloat16,3,0.015646399557590486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,fp8,3,0.01597920060157776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,bfloat16,7,0.01565759927034378
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,bfloat16,15,0.015270400047302245
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,fp8,15,0.016327999532222748
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,bfloat16,31,0.015265600383281707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,fp8,31,0.016307200491428375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,bfloat16,63,0.015624000132083893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,fp8,63,0.016575999557971954
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,bfloat16,127,0.01592639982700348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,fp8,127,0.016476799547672272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,bfloat16,255,0.017591999471187593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,fp8,255,0.018193599581718446
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,bfloat16,511,0.023073600232601167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,fp8,511,0.022407999634742735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,bfloat16,fp8,2047,0.017769600450992584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,bfloat16,1023,0.04340640008449555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,bfloat16,1,0.018617600202560425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,fp8,1,0.02025440037250519
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,bfloat16,3,0.018510399758815764
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,fp8,3,0.020183999836444855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,bfloat16,7,0.018454399704933167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,fp8,7,0.020268799364566804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,bfloat16,15,0.018488000333309173
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,fp8,15,0.020190399885177613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,bfloat16,31,0.018596799671649934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,fp8,31,0.02030559927225113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,bfloat16,63,0.01849119961261749
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,fp8,63,0.020343999564647674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,bfloat16,127,0.019270400702953338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,bfloat16,fp8,31,0.012491200119256973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,fp8,127,0.020337599515914916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,bfloat16,255,0.028937599062919615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,fp8,255,0.02446240037679672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,bfloat16,511,0.04279040098190308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,fp8,511,0.033020800352096556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,fp8,1023,0.051123201847076416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,bfloat16,bfloat16,1023,0.06896640062332153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,fp8,1,0.029182401299476624
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,fp8,3,0.029207998514175416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,bfloat16,7,0.02603360116481781
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,fp8,7,0.02922559976577759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,bfloat16,15,0.02616479992866516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,fp8,3,0.012691199779510498
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,fp8,15,0.029177600145339967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,bfloat16,bfloat16,31,0.011931200325489045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,bfloat16,31,0.026417601108551025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,fp8,31,0.029025599360466003
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,bfloat16,63,0.026086398959159852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,bfloat16,127,0.031990399956703185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,fp8,127,0.029185599088668822
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,bfloat16,255,0.04741599857807159
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,fp8,255,0.039417600631713866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,bfloat16,511,0.06874399781227111
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,fp8,511,0.055579197406768796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,fp8,7,0.015985600650310516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,bfloat16,1023,0.1154703974723816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,fp8,1023,0.08299999833106994
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,bfloat16,1,0.04036319851875305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,fp8,1,0.04681439995765686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,bfloat16,3,0.04098399877548218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,fp8,3,0.0465472012758255
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,bfloat16,7,0.040545600652694705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,fp8,7,0.046881601214408875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,bfloat16,15,0.04033919870853424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,fp8,15,0.04693120121955872
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,bfloat16,31,0.04066239893436432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,fp8,31,0.046647998690605166
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,bfloat16,63,0.045737600326538085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,fp8,63,0.04689759910106659
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,bfloat16,127,0.05363839864730835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,fp8,127,0.051678401231765744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,bfloat16,fp8,1023,0.03256320059299469
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,bfloat16,255,0.07623680233955384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,fp8,255,0.06781280040740967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,bfloat16,511,0.11873600482940674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,fp8,511,0.09640480279922485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,bfloat16,1,0.021507200598716737
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,bfloat16,1023,0.20754239559173585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,bfloat16,3,0.02157599925994873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,fp8,3,0.02314240038394928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,bfloat16,7,0.021534399688243867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,fp8,7,0.023156799376010895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,bfloat16,15,0.021524800360202788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,fp8,15,0.023280000686645506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,bfloat16,31,0.02182080000638962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,fp8,31,0.023318399488925935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,bfloat16,63,0.02208160012960434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,fp8,63,0.023363199830055238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,bfloat16,1,0.02614719867706299
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,fp8,127,0.02359839975833893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,bfloat16,255,0.0365231990814209
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,bfloat16,3,0.02606559991836548
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,fp8,255,0.02747359871864319
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,bfloat16,1,0.028401601314544677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,fp8,1,0.03210879862308502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,bfloat16,3,0.028486400842666626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,fp8,3,0.031632000207901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,bfloat16,7,0.028567999601364136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,fp8,7,0.03176159858703613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,bfloat16,15,0.02852959930896759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,fp8,15,0.03221119940280914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,bfloat16,31,0.02866879999637604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,fp8,31,0.03223679959774017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,bfloat16,63,0.030275198817253112
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,fp8,63,0.032179200649261476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,bfloat16,127,0.03937279880046844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,fp8,127,0.03282240033149719
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,bfloat16,fp8,63,0.029577600955963134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,fp8,255,0.04467839896678925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,bfloat16,bfloat16,255,0.051875197887420656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,bfloat16,1,0.043382400274276735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,fp8,1,0.04920479953289032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,bfloat16,3,0.04315840005874634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,fp8,3,0.049527999758720395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,bfloat16,7,0.04290879964828491
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,fp8,7,0.049414399266242984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,bfloat16,15,0.04338400065898895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,fp8,15,0.04921759963035584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,fp8,31,0.04991840124130249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,bfloat16,63,0.050318402051925656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,fp8,63,0.05002880096435547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,bfloat16,127,0.054992002248764035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,fp8,127,0.057599997520446776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,bfloat16,255,0.0774399995803833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,fp8,255,0.0704912006855011
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,bfloat16,1,0.07151520252227783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,fp8,1,0.08455039858818054
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,bfloat16,3,0.07202720046043395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,fp8,3,0.08462560176849365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,bfloat16,7,0.07131519913673401
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,fp8,7,0.08508319854736328
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,bfloat16,15,0.0723904013633728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,fp8,15,0.08463039994239807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,bfloat16,31,0.07586399912834167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,fp8,31,0.0855247974395752
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,bfloat16,63,0.08071680068969726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,fp8,63,0.09182400107383729
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,bfloat16,127,0.09080960154533387
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,fp8,127,0.1004080057144165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,bfloat16,fp8,1023,0.14667680263519287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,bfloat16,255,0.13057440519332886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,bfloat16,1,0.03643839955329895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,bfloat16,fp8,255,0.12269120216369629
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,bfloat16,3,0.03657439947128296
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,fp8,3,0.03893919885158539
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,bfloat16,7,0.03728959858417511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,fp8,7,0.038863998651504514
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,bfloat16,15,0.0369264006614685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,fp8,15,0.03922719955444336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,bfloat16,31,0.038755199313163756
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,fp8,31,0.03940320014953613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,bfloat16,63,0.0440416008234024
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,bfloat16,127,0.023196800053119658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,fp8,63,0.04084480106830597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,bfloat16,127,0.047152000665664676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,fp8,127,0.04362399876117706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,fp8,1,0.05581279993057251
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,bfloat16,3,0.051025599241256714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,fp8,3,0.05602719783782959
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,bfloat16,7,0.05166239738464355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,fp8,7,0.05583040118217468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,bfloat16,15,0.051472002267837526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,fp8,15,0.05703999996185303
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,bfloat16,31,0.05410879850387573
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,fp8,31,0.056745600700378415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,bfloat16,63,0.05901920199394226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,fp8,63,0.060920000076293945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,bfloat16,127,0.0665776014328003
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,fp8,127,0.06417440176010132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,bfloat16,1,0.08015519976615906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,fp8,1,0.09074879884719848
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,bfloat16,3,0.08059359788894653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,fp8,3,0.08978400230407715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,bfloat16,7,0.0815168023109436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,fp8,7,0.09104160070419312
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,bfloat16,15,0.08299199938774109
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,fp8,15,0.09151520133018494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,bfloat16,31,0.08666399717330933
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,fp8,31,0.093094402551651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,bfloat16,63,0.08915680050849914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,fp8,63,0.09942079782485962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,bfloat16,bfloat16,31,0.04346239864826203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,bfloat16,127,0.0965503990650177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,bfloat16,fp8,127,0.10370559692382812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,bfloat16,1,0.13939039707183837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,fp8,1,0.1569424033164978
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,bfloat16,3,0.1391167998313904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,fp8,3,0.15821280479431152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,bfloat16,7,0.14056799411773682
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,fp8,7,0.15674400329589844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,fp8,15,0.15998239517211915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,bfloat16,31,0.1454975962638855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,fp8,31,0.16862080097198487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,bfloat16,63,0.1468448042869568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,fp8,63,0.17187999486923217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,bfloat16,127,0.15685759782791137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,bfloat16,1,0.01191679984331131
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,fp8,127,0.1866703987121582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,bfloat16,fp8,1,0.023712000250816344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,fp8,3,0.012715199589729309
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,bfloat16,7,0.012110400199890136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,bfloat16,fp8,1,0.038980799913406375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,fp8,7,0.012771199643611907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,bfloat16,15,0.012139199674129486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,fp8,15,0.012839999794960023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,bfloat16,31,0.012107200175523757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,fp8,31,0.01281919926404953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,bfloat16,63,0.012051200121641159
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,fp8,63,0.012763200700283051
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,bfloat16,127,0.012049599736928939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,fp8,127,0.012705600261688233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,bfloat16,255,0.013779200613498688
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,fp8,255,0.01451520025730133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,bfloat16,511,0.01671999990940094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,fp8,511,0.018404799699783325
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,bfloat16,1023,0.017123199999332428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,fp8,1023,0.018063999712467194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,bfloat16,2047,0.018190400302410127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,fp8,2047,0.018636800348758698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,bfloat16,1,0.011764799803495407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,bfloat16,3,0.011811199784278869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,fp8,3,0.012516799569129943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,bfloat16,7,0.011772800236940384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,fp8,7,0.012606400251388549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,bfloat16,15,0.012011200189590454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,fp8,15,0.012622399628162384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,bfloat16,31,0.012011200189590454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,fp8,31,0.012705600261688233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,bfloat16,63,0.011913599818944931
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,fp8,63,0.012617599964141846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,bfloat16,127,0.012238399684429168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,fp8,127,0.012577599287033081
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,bfloat16,255,0.01371999979019165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,bfloat16,511,0.016638399660587312
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,fp8,511,0.018352000415325163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,bfloat16,1023,0.016982400417327882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,fp8,1023,0.01775359958410263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,bfloat16,2047,0.01886879950761795
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,fp8,2047,0.018457600474357606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,bfloat16,1,0.01202400028705597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,fp8,1,0.012718400359153748
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,bfloat16,bfloat16,15,0.1424847960472107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,bfloat16,3,0.01202239990234375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,fp8,3,0.012559999525547028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,bfloat16,7,0.012107200175523757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,fp8,7,0.012582400441169738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,bfloat16,15,0.012107200175523757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,fp8,15,0.012556800246238708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,bfloat16,31,0.012124799937009812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,fp8,31,0.012542399764060973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,bfloat16,63,0.012148799747228623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,fp8,63,0.012724800407886505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,bfloat16,127,0.012319999933242797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,fp8,127,0.01271200031042099
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,bfloat16,255,0.013849599659442902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,bfloat16,3,0.011857599765062333
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,bfloat16,fp8,1,0.012695999443531036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,fp8,255,0.014604799449443817
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,fp8,511,0.018377600610256194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,bfloat16,1023,0.01804479956626892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,fp8,1023,0.0174575999379158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,bfloat16,2047,0.020524799823760986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,fp8,2047,0.021296000480651854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,bfloat16,1,0.012110400199890136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,fp8,1,0.012966400384902954
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,fp8,3,0.01284639984369278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,bfloat16,7,0.012223999947309494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,fp8,7,0.012878400087356568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,bfloat16,15,0.012241599708795547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,fp8,15,0.01290079951286316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,bfloat16,31,0.01228640004992485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,fp8,31,0.012943999469280243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,bfloat16,63,0.012240000069141388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,bfloat16,bfloat16,1,0.05005279779434204
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,bfloat16,127,0.012415999919176102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,bfloat16,255,0.013894400000572205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,fp8,255,0.014691199362277984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,fp8,1,0.012600000202655792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,bfloat16,511,0.017049600183963776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,fp8,511,0.018607999384403228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,bfloat16,1023,0.020080000162124634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,fp8,1023,0.021135999262332915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,bfloat16,2047,0.038134399056434634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,fp8,2047,0.02720479965209961
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,bfloat16,1,0.06775040030479432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,fp8,1,0.07075520157814026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,bfloat16,3,0.06817600131034851
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,fp8,3,0.07070879936218262
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,bfloat16,fp8,255,0.014460800588130951
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,bfloat16,7,0.0685696005821228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,fp8,7,0.07110239863395691
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,fp8,15,0.07087680101394653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,bfloat16,31,0.07047200202941895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,fp8,31,0.07131839990615844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,bfloat16,63,0.07268639802932739
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,fp8,63,0.07293599843978882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,bfloat16,1,0.09394239783287048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,fp8,1,0.10444320440292358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,bfloat16,3,0.0946016013622284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,fp8,3,0.10461599826812744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,bfloat16,7,0.09472640156745911
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,fp8,7,0.10525599718093873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,bfloat16,15,0.09559040069580078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,fp8,15,0.10567359924316407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,bfloat16,31,0.0968559980392456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,bfloat16,bfloat16,511,0.016908800601959227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,fp8,31,0.10656319856643677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,bfloat16,63,0.10052160024642945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,bfloat16,fp8,63,0.10878560543060303
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,bfloat16,3,0.012171199917793274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,bfloat16,1,0.15023360252380372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,fp8,1,0.17241439819335938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,bfloat16,3,0.1495743989944458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,bfloat16,7,0.1505887985229492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,fp8,7,0.1727887988090515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,fp8,3,0.1731328010559082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,bfloat16,15,0.1506592035293579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,fp8,63,0.012742400169372559
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,fp8,15,0.17482240200042726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,bfloat16,fp8,127,0.01308320015668869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,bfloat16,31,0.1525488018989563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,bfloat16,63,0.15610239505767823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,fp8,31,0.17545119524002076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,bfloat16,fp8,63,0.1796064019203186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,bfloat16,1,0.2675343990325928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,bfloat16,3,0.26474239826202395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,bfloat16,7,0.2658735990524292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,fp8,1,0.3176336050033569
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,fp8,3,0.31451680660247805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,fp8,7,0.3157552003860474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,bfloat16,15,0.26859679222106936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,fp8,15,0.31406559944152834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,bfloat16,31,0.26917600631713867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,bfloat16,bfloat16,1,0.11771520376205444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,bfloat16,63,0.27332639694213867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,fp8,31,0.3202176094055176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,bfloat16,fp8,63,0.3206991910934448
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,bfloat16,fp8,1,0.12538880109786987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,bfloat16,bfloat16,3,0.11780799627304077
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,bfloat16,fp8,3,0.12592639923095703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,bfloat16,bfloat16,7,0.11867040395736694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,bfloat16,bfloat16,15,0.06819040179252625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,bfloat16,fp8,7,0.12626880407333374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,bfloat16,fp8,15,0.12594560384750367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,bfloat16,bfloat16,31,0.1211967945098877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,bfloat16,fp8,31,0.12758560180664064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,bfloat16,bfloat16,1,0.168614399433136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,bfloat16,bfloat16,3,0.17034720182418822
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,bfloat16,fp8,1,0.19230079650878906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,bfloat16,fp8,3,0.19099040031433107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,bfloat16,bfloat16,7,0.16989760398864745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,bfloat16,fp8,7,0.19275519847869874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,bfloat16,bfloat16,15,0.17096480131149291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,bfloat16,fp8,15,0.19402400255203248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,bfloat16,bfloat16,31,0.1737264037132263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,bfloat16,fp8,31,0.1938223958015442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,bfloat16,bfloat16,1,0.27935519218444826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,bfloat16,bfloat16,3,0.27819199562072755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,bfloat16,fp8,1,0.32787840366363524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,bfloat16,bfloat16,7,0.281060791015625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,bfloat16,fp8,3,0.3295680046081543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,bfloat16,fp8,7,0.3271375894546509
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,bfloat16,bfloat16,15,0.27954399585723877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,bfloat16,bfloat16,31,0.2841439962387085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,bfloat16,fp8,31,0.33183040618896487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,bfloat16,bfloat16,1,0.5131792068481446
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,bfloat16,bfloat16,3,0.5081200122833252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,bfloat16,fp8,1,0.5995488166809082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,bfloat16,fp8,3,0.6008319854736328
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,bfloat16,bfloat16,7,0.5089712142944336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,bfloat16,fp8,7,0.6101088047027587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,bfloat16,bfloat16,15,0.5099696159362793
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,bfloat16,1,0.012624000012874604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,fp8,1,0.013099199533462525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,bfloat16,fp8,15,0.6007952213287353
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,bfloat16,3,0.012571200728416443
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,fp8,3,0.01313920021057129
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,bfloat16,7,0.012166400253772736
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,fp8,7,0.01308799982070923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,bfloat16,15,0.01228799968957901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,fp8,15,0.012804800271987915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,bfloat16,fp8,31,0.6090176105499268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,bfloat16,31,0.012044800072908401
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,fp8,31,0.013099199533462525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,bfloat16,63,0.012132800370454788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,bfloat16,127,0.012308800220489502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,fp8,63,0.012732799351215362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,bfloat16,255,0.0139055997133255
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,fp8,255,0.014552000164985656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,bfloat16,511,0.01691199988126755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,fp8,511,0.01833759993314743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,fp8,1023,0.018935999274253844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,bfloat16,2047,0.02014559954404831
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,fp8,2047,0.020304000377655028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,bfloat16,1,0.011806400120258331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,fp8,1,0.012724800407886505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,bfloat16,3,0.011800000071525573
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,fp8,3,0.012753599882125854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,bfloat16,7,0.011806400120258331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,fp8,7,0.013036799430847169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,bfloat16,bfloat16,15,0.12010879516601562
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,bfloat16,15,0.011952000111341477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,fp8,15,0.012835200130939483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,bfloat16,31,0.012267199903726577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,fp8,31,0.012916800379753113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,bfloat16,63,0.01226079985499382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,fp8,63,0.013142399489879608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,bfloat16,127,0.012272000312805176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,fp8,127,0.013060800731182098
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,bfloat16,255,0.014148800075054169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,fp8,255,0.0147599995136261
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,bfloat16,511,0.01736160069704056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,fp8,511,0.01852799952030182
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,bfloat16,1023,0.018787199258804323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,fp8,1023,0.01865919977426529
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,bfloat16,2047,0.022228799760341644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,bfloat16,fp8,2047,0.02194560021162033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,bfloat16,1,0.012174399942159653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,fp8,1,0.012895999848842621
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,bfloat16,3,0.012062399834394454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,fp8,3,0.012894399464130402
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,bfloat16,7,0.012219200283288956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,fp8,7,0.012695999443531036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,bfloat16,15,0.012294399738311767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,fp8,15,0.012883199751377106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,bfloat16,31,0.012049599736928939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,fp8,31,0.012971200048923492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,bfloat16,63,0.012089599668979645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,fp8,63,0.013048000633716583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,bfloat16,127,0.012454400211572647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,fp8,127,0.012887999415397644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,bfloat16,255,0.01406240016222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,bfloat16,fp8,15,0.32881920337677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,fp8,511,0.018278400599956512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,bfloat16,1023,0.0204352006316185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,fp8,1023,0.021161599457263945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,bfloat16,2047,0.038815999031066896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,fp8,2047,0.02748799920082092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,bfloat16,1,0.012348800152540206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,fp8,1,0.01284160017967224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,bfloat16,3,0.012328000366687774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,fp8,3,0.012798400223255157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,bfloat16,7,0.012252800166606903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,fp8,7,0.012868799269199371
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,bfloat16,15,0.012292800098657608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,fp8,15,0.012852799892425538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,bfloat16,31,0.012353599816560746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,fp8,31,0.012907199561595917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,bfloat16,63,0.012332800030708312
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,fp8,63,0.013043199479579926
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,fp8,127,0.012908799946308136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,bfloat16,255,0.01417119950056076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,bfloat16,bfloat16,31,0.5150063991546631
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,bfloat16,511,0.017136000096797943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,fp8,511,0.018248000741004945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,fp8,127,0.012984000146389008
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,bfloat16,1023,0.036620798707008365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,fp8,1023,0.026480001211166383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,bfloat16,bfloat16,1023,0.01797440052032471
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,bfloat16,2047,0.05843520164489746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,fp8,2047,0.041345599293708804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,bfloat16,1,0.01326880007982254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,fp8,1,0.013844799995422364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,bfloat16,3,0.013211199641227722
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,fp8,3,0.014020800590515137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,fp8,7,0.014084799587726593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,bfloat16,15,0.013193599879741669
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,fp8,15,0.014115199446678162
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,bfloat16,31,0.013191999495029449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,fp8,31,0.0141184002161026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,bfloat16,63,0.013251200318336487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,fp8,63,0.014238399267196656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,bfloat16,127,0.013601599633693695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,fp8,127,0.014280000329017639
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,bfloat16,255,0.015427200496196747
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,fp8,255,0.016212800145149232
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,bfloat16,511,0.018486399948596955
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,fp8,511,0.01966560035943985
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,bfloat16,1023,0.030052798986434936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,fp8,1023,0.02529279887676239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,bfloat16,2047,0.044865599274635314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,fp8,2047,0.034683200716972354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,bfloat16,1,0.013300800323486328
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,fp8,1,0.014163200557231904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,bfloat16,3,0.013372799754142762
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,fp8,3,0.014259199798107147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,bfloat16,7,0.01329759955406189
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,fp8,7,0.014238399267196656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,bfloat16,15,0.013372799754142762
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,bfloat16,511,0.017019200325012206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,bfloat16,31,0.013371199369430542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,fp8,31,0.014217600226402283
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,bfloat16,63,0.013337600231170654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,fp8,63,0.014164799451828003
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,bfloat16,127,0.013510400056838989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,fp8,127,0.014127999544143677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,bfloat16,255,0.01515199989080429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,fp8,255,0.01600160002708435
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,bfloat16,511,0.018857599794864656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,fp8,511,0.01961600035429001
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,bfloat16,1023,0.039103999733924866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,fp8,1023,0.02873919904232025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,bfloat16,127,0.012591999769210816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,bfloat16,2047,0.06129119992256164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,fp8,2047,0.0430976003408432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,bfloat16,1,0.01717599928379059
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,bfloat16,fp8,255,0.014953599870204925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,fp8,1,0.01871519982814789
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,bfloat16,3,0.01717119961977005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,fp8,3,0.01865759938955307
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,fp8,7,0.018540799617767334
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,bfloat16,15,0.01716960072517395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,fp8,15,0.01852640062570572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,bfloat16,31,0.017107200622558594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,fp8,31,0.01852319985628128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,bfloat16,63,0.01712000072002411
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,fp8,63,0.018544000387191773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,bfloat16,127,0.01724800020456314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,fp8,127,0.018512000143527985
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,bfloat16,255,0.021751999855041504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,fp8,255,0.022364799678325654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,bfloat16,bfloat16,7,0.013051199913024902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,bfloat16,511,0.039192000031471254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,fp8,511,0.03051519989967346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,fp8,1023,0.04726560115814209
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,bfloat16,2047,0.10889279842376709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,fp8,2047,0.07121599912643432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,bfloat16,1,0.024758400022983552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,fp8,1,0.027457600831985472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,bfloat16,3,0.02446720004081726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,fp8,3,0.02736159861087799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,bfloat16,7,0.02481600046157837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,fp8,7,0.027804800868034364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,bfloat16,15,0.02447039932012558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,fp8,15,0.027526399493217467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,bfloat16,31,0.024460799992084503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,fp8,31,0.0273391991853714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,bfloat16,63,0.02489120066165924
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,fp8,63,0.027371200919151305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,bfloat16,127,0.02590239942073822
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,fp8,127,0.02757120132446289
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,bfloat16,255,0.04448800086975098
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,fp8,255,0.03605920076370239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,bfloat16,511,0.06560479998588561
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,fp8,511,0.05288640260696411
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,bfloat16,1023,0.11114239692687988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,bfloat16,fp8,255,0.01499200016260147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,bfloat16,fp8,15,0.01417279988527298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,bfloat16,1,0.011393599957227708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,bfloat16,2047,0.19652960300445557
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,bfloat16,3,0.011512000113725662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,fp8,1,0.012478400021791458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,fp8,2047,0.1265679955482483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,bfloat16,7,0.011500799655914306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,fp8,7,0.01249919980764389
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,fp8,3,0.012350399792194367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,bfloat16,15,0.011615999788045884
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,fp8,15,0.012441600114107132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,bfloat16,31,0.011776000261306763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,fp8,31,0.012406399846076966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,bfloat16,63,0.011648000031709672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,fp8,63,0.012403199821710587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,bfloat16,127,0.011791999638080596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,fp8,127,0.01239359974861145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,bfloat16,255,0.013254399597644805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,fp8,255,0.014424000680446625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,bfloat16,511,0.016364799439907075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,fp8,511,0.01833759993314743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,fp8,1023,0.017396800220012665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,bfloat16,2047,0.016833600401878358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,fp8,2047,0.017907199263572694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,bfloat16,1,0.011390399932861329
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,fp8,1,0.012319999933242797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,bfloat16,3,0.011483199894428253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,fp8,3,0.012116800248622894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,bfloat16,7,0.011558400094509124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,fp8,7,0.0122079998254776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,bfloat16,15,0.01143840029835701
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,fp8,15,0.012476799637079239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,bfloat16,31,0.011390399932861329
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,fp8,31,0.012206400185823441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,bfloat16,63,0.011665599793195725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,fp8,63,0.012275200337171555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,bfloat16,127,0.011443199962377549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,fp8,127,0.012503999471664428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,bfloat16,255,0.013072000443935394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,fp8,255,0.014305600523948669
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,bfloat16,511,0.016436800360679626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,fp8,511,0.01814880073070526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,bfloat16,1023,0.016120000183582305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,bfloat16,7,0.01725600063800812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,bfloat16,2047,0.01643040031194687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,fp8,2047,0.01743520051240921
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,bfloat16,1,0.01168000027537346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,fp8,1,0.012247999757528305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,bfloat16,3,0.011488000303506852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,fp8,3,0.012379200011491776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,bfloat16,7,0.011435200273990632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,fp8,7,0.01223199963569641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,bfloat16,15,0.011580800265073776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,fp8,15,0.012294399738311767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,bfloat16,31,0.011512000113725662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,fp8,31,0.012238399684429168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,bfloat16,63,0.011585599929094314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,fp8,63,0.012291199713945388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,bfloat16,127,0.011691199988126755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,fp8,127,0.012292800098657608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,bfloat16,255,0.013321599364280701
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,fp8,255,0.014287999272346497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,bfloat16,511,0.016223999857902526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,fp8,511,0.018092800676822663
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,bfloat16,1023,0.015915200114250183
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,bfloat16,bfloat16,1023,0.06452959775924683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,fp8,1023,0.01687680035829544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,bfloat16,2047,0.016249600052833556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,bfloat16,1,0.011432000249624253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,bfloat16,fp8,2047,0.01733119934797287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,fp8,1,0.012368000298738479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,bfloat16,3,0.011422400176525117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,fp8,3,0.012222400307655335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,bfloat16,7,0.011531200259923935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,fp8,7,0.012203200161457062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,bfloat16,15,0.0115167997777462
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,fp8,15,0.0124208003282547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,bfloat16,31,0.011417599767446518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,fp8,31,0.012206400185823441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,bfloat16,63,0.0117296002805233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,fp8,63,0.012230399996042252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,bfloat16,127,0.011400000005960465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,fp8,127,0.012385600060224534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,bfloat16,255,0.013240000605583191
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,fp8,255,0.01417119950056076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,bfloat16,511,0.016281600296497344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,fp8,511,0.017761600017547608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,bfloat16,1023,0.015910400450229643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,fp8,1023,0.017080000042915343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,bfloat16,2047,0.01658719927072525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,bfloat16,fp8,2047,0.01723040044307709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,bfloat16,1,0.011659199744462967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,fp8,1,0.01250080019235611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,bfloat16,3,0.01165120005607605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,fp8,3,0.01236959993839264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,bfloat16,7,0.01175519973039627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,fp8,7,0.0123648002743721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,bfloat16,15,0.011713600158691407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,fp8,15,0.012342400103807449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,bfloat16,31,0.011662399768829346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,bfloat16,fp8,1023,0.07938719987869262
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,fp8,31,0.012449599802494049
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,bfloat16,63,0.011552000045776367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,fp8,63,0.012401600182056428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,fp8,127,0.012428800016641617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,bfloat16,255,0.013123199343681335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,bfloat16,127,0.011745599657297134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,bfloat16,511,0.016278399527072905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,fp8,511,0.018270400166511536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,bfloat16,1023,0.016625599563121797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,fp8,1023,0.017696000635623932
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,bfloat16,2047,0.016808000206947327
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,fp8,2047,0.017868800461292265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,bfloat16,1,0.011636800318956374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,fp8,1,0.01228640004992485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,bfloat16,3,0.011695999652147293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,fp8,3,0.012411200255155564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,bfloat16,7,0.011484800279140473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,fp8,7,0.01250240057706833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,bfloat16,15,0.011655999720096588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,bfloat16,31,0.011579199880361556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,bfloat16,bfloat16,1023,0.01648319959640503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,fp8,31,0.012489599734544754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,bfloat16,63,0.01173280030488968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,fp8,63,0.012257599830627441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,fp8,127,0.012427199631929398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,bfloat16,127,0.011555200070142746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,bfloat16,255,0.013372799754142762
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,fp8,255,0.014270399510860444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,bfloat16,511,0.016358399391174318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,bfloat16,1023,0.01626240015029907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,fp8,1023,0.017158399522304534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,bfloat16,2047,0.016499200463294984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,fp8,2047,0.017641599476337432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,bfloat16,1,0.01146399974822998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,fp8,1,0.012358400225639343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,fp8,3,0.012292800098657608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,bfloat16,7,0.011582399904727935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,fp8,7,0.012331199645996094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,bfloat16,15,0.011708799749612808
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,fp8,15,0.012211199849843979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,bfloat16,fp8,1023,0.017292800545692443
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,bfloat16,31,0.011721599847078323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,fp8,31,0.012305600196123123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,bfloat16,63,0.011739200353622437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,fp8,63,0.01239679977297783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,bfloat16,127,0.011687999963760376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,fp8,127,0.01242239996790886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,bfloat16,255,0.013283200562000275
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,bfloat16,511,0.016233600676059723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,fp8,511,0.018025599420070648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,bfloat16,1023,0.015958400070667268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,fp8,1023,0.017059199512004852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,bfloat16,2047,0.016862399876117706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,fp8,2047,0.017548799514770508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,bfloat16,1,0.011919999867677689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,fp8,1,0.012724800407886505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,bfloat16,3,0.011896000057458878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,fp8,3,0.012667199969291687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,bfloat16,7,0.011671999841928482
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,fp8,7,0.012459199875593185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,bfloat16,15,0.011695999652147293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,fp8,15,0.012337599694728852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,bfloat16,31,0.011695999652147293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,fp8,31,0.01242400035262108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,bfloat16,63,0.011772800236940384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,fp8,63,0.01249919980764389
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,bfloat16,127,0.011832000315189361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,fp8,127,0.012495999783277511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,bfloat16,255,0.013761599361896516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,bfloat16,fp8,255,0.014280000329017639
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,bfloat16,511,0.016283200681209566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,fp8,511,0.017936000227928163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,bfloat16,1023,0.01603199988603592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,fp8,1023,0.017411200702190398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,fp8,15,0.0124208003282547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,bfloat16,2047,0.017868800461292265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,fp8,2047,0.01775840073823929
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,bfloat16,1,0.012921600043773651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,fp8,1,0.013473600149154663
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,bfloat16,3,0.01297920048236847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,bfloat16,fp8,511,0.017918400466442108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,fp8,3,0.0133200004696846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,bfloat16,7,0.012931199371814727
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,fp8,7,0.013356800377368926
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,bfloat16,15,0.012649600207805634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,fp8,15,0.013729600608348847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,bfloat16,31,0.012566399574279786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,bfloat16,3,0.011587200313806533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,fp8,31,0.013494400680065155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,bfloat16,63,0.012980799376964568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,fp8,63,0.0134320005774498
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,fp8,127,0.013526399433612824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,bfloat16,127,0.012880000472068786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,fp8,255,0.015171200037002563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,bfloat16,511,0.017752000689506532
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,fp8,511,0.01910240054130554
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,bfloat16,1023,0.02036159932613373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,fp8,1023,0.020185600221157073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,bfloat16,2047,0.025268799066543578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,fp8,2047,0.023580799996852874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,bfloat16,fp8,255,0.014350399374961853
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,fp8,1,0.013463999330997466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,bfloat16,3,0.012518399953842163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,fp8,3,0.01340479999780655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,bfloat16,7,0.012990400195121765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,bfloat16,15,0.012572799623012543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,fp8,15,0.013350400328636169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,bfloat16,31,0.012905600666999816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,fp8,31,0.013252800703048706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,bfloat16,63,0.012547199428081513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,fp8,63,0.013708800077438354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,bfloat16,127,0.012814399600028992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,fp8,127,0.013230399787425995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,bfloat16,255,0.01494240015745163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,fp8,255,0.015249599516391755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,bfloat16,511,0.017459200322628023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,fp8,511,0.01894879937171936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,bfloat16,1023,0.02178879976272583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,fp8,1023,0.022331200540065765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,bfloat16,2047,0.04077439904212952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,fp8,2047,0.029185599088668822
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,bfloat16,1,0.012615999579429627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,fp8,1,0.013633599877357483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,bfloat16,7,0.012782399356365205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,fp8,7,0.013489599525928497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,bfloat16,15,0.012838399410247803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,fp8,15,0.013572800159454345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,bfloat16,31,0.012720000743865967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,fp8,31,0.013579200208187103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,bfloat16,63,0.012969599664211273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,fp8,63,0.01363999992609024
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,bfloat16,127,0.013206399977207184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,fp8,127,0.01361120045185089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,bfloat16,fp8,255,0.014521600306034088
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,bfloat16,255,0.014983999729156493
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,bfloat16,511,0.018067200481891633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,fp8,255,0.015715199708938598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,fp8,511,0.01902880072593689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,bfloat16,1023,0.03824959993362427
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,fp8,1023,0.026655998826026917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,bfloat16,2047,0.05990399718284607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,fp8,2047,0.04037440121173859
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,bfloat16,1,0.01679839938879013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,fp8,1,0.017888000607490538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,bfloat16,3,0.0168272003531456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,fp8,3,0.0179407998919487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,bfloat16,7,0.016832000017166136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,fp8,7,0.01801439970731735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,bfloat16,15,0.016913600265979767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,fp8,15,0.018025599420070648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,bfloat16,31,0.01690559983253479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,fp8,31,0.018111999332904815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,bfloat16,bfloat16,255,0.014841599762439728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,bfloat16,63,0.016784000396728515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,fp8,63,0.017907199263572694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,bfloat16,127,0.017025600373744964
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,fp8,127,0.01789119988679886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,bfloat16,255,0.02107519954442978
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,bfloat16,1,0.012636800110340119
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,fp8,255,0.02195200026035309
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,bfloat16,511,0.037492799758911136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,bfloat16,fp8,7,0.013312000036239623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,bfloat16,1023,0.06135680079460144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,fp8,1023,0.045121601223945616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,bfloat16,1,0.011644800007343293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,fp8,2047,0.07076960206031799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,fp8,1,0.012331199645996094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,bfloat16,3,0.011665599793195725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,fp8,3,0.01239520013332367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,bfloat16,7,0.011739200353622437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,fp8,7,0.012452799826860428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,bfloat16,15,0.01170559972524643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,fp8,15,0.012443199753761292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,bfloat16,31,0.011774399876594543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,fp8,31,0.012415999919176102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,bfloat16,63,0.011806400120258331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,fp8,63,0.012457600235939026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,bfloat16,127,0.011740799993276596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,fp8,127,0.01247519999742508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,bfloat16,255,0.013407999277114868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,fp8,255,0.01424960047006607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,fp8,3,0.013507199287414551
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,bfloat16,511,0.016451199352741242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,fp8,511,0.018236799538135527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,bfloat16,1023,0.01650079935789108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,bfloat16,2047,0.01690080016851425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,fp8,2047,0.01796640008687973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,bfloat16,1,0.011644800007343293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,fp8,1,0.012302400171756744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,bfloat16,3,0.011523199826478958
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,fp8,3,0.012342400103807449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,bfloat16,7,0.011572799831628799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,fp8,7,0.01228799968957901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,bfloat16,15,0.01165279969573021
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,fp8,15,0.012275200337171555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,bfloat16,31,0.011627200245857238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,fp8,31,0.012451200187206269
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,bfloat16,63,0.011638399958610535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,fp8,63,0.012417600303888322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,bfloat16,127,0.01175680011510849
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,fp8,127,0.012368000298738479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,bfloat16,255,0.013235199451446533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,bfloat16,511,0.01641920059919357
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,fp8,511,0.018024000525474548
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,bfloat16,1023,0.01629119962453842
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,fp8,1023,0.01732960045337677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,bfloat16,2047,0.016777600347995757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,fp8,2047,0.017654399573802947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,bfloat16,1,0.01188800036907196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,fp8,1,0.012414400279521943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,bfloat16,3,0.011760000139474869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,fp8,3,0.012454400211572647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,bfloat16,7,0.011633600294589996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,fp8,7,0.012460800260305405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,bfloat16,15,0.011633600294589996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,fp8,15,0.012511999905109405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,bfloat16,31,0.011657600104808808
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,fp8,31,0.012484800070524216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,bfloat16,63,0.01162400022149086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,fp8,63,0.01242400035262108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,bfloat16,127,0.011966399848461151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,fp8,127,0.012411200255155564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,bfloat16,255,0.01345600038766861
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,fp8,255,0.01433439999818802
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,bfloat16,511,0.016358399391174318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,fp8,511,0.01807679980993271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,bfloat16,1023,0.01629280000925064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,fp8,1023,0.017056000232696534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,bfloat16,2047,0.018055999279022218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,bfloat16,fp8,2047,0.017688000202178956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,bfloat16,1,0.011811199784278869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,bfloat16,3,0.011774399876594543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,fp8,3,0.01257600039243698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,bfloat16,7,0.01178399994969368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,fp8,7,0.012529599666595458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,bfloat16,15,0.01178240031003952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,fp8,511,0.029873600602149962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,fp8,15,0.01265919953584671
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,fp8,31,0.012756800651550293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,bfloat16,31,0.011908800154924393
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,bfloat16,63,0.011975999921560287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,bfloat16,bfloat16,2047,0.105404794216156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,bfloat16,127,0.012055999785661697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,fp8,127,0.01297599971294403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,bfloat16,255,0.013699199259281158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,fp8,255,0.014958399534225463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,fp8,511,0.01849599927663803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,bfloat16,1023,0.01798879951238632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,bfloat16,bfloat16,3,0.012567999958992004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,fp8,1023,0.017521600425243377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,bfloat16,fp8,1023,0.01769919991493225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,fp8,2047,0.02141920030117035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,bfloat16,1,0.014843200147151948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,fp8,1,0.015647999942302704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,bfloat16,3,0.014864000678062438
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,fp8,3,0.01584160029888153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,bfloat16,7,0.01480800062417984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,fp8,7,0.015652799606323244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,bfloat16,15,0.014632000029087067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,fp8,15,0.01579200029373169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,bfloat16,31,0.014924800395965577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,fp8,31,0.015795199573040007
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,bfloat16,63,0.014931200444698334
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,fp8,63,0.01576800048351288
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,bfloat16,127,0.015326400101184846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,fp8,127,0.015607999265193939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,bfloat16,255,0.018110400438308714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,fp8,255,0.017820799350738527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,bfloat16,511,0.022580799460411072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,fp8,511,0.02141280025243759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,bfloat16,1023,0.04191839993000031
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,fp8,1,0.012758399546146392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,bfloat16,1,0.018353599309921264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,fp8,1,0.02003519982099533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,bfloat16,3,0.018321600556373597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,fp8,3,0.019864000380039215
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,bfloat16,7,0.01845120042562485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,fp8,7,0.019908800721168518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,bfloat16,15,0.018457600474357606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,fp8,63,0.012723200023174286
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,fp8,15,0.019828799366950988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,bfloat16,31,0.018406400084495546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,bfloat16,63,0.018367999792099
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,fp8,31,0.02030559927225113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,fp8,63,0.02012320011854172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,bfloat16,127,0.01860000044107437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,fp8,127,0.020414400100708007
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,bfloat16,255,0.025753599405288697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,fp8,255,0.023951999843120575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,bfloat16,511,0.042467200756073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,fp8,511,0.03272480070590973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,bfloat16,1023,0.067958402633667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,bfloat16,1,0.026100799441337585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,bfloat16,fp8,1023,0.05044639706611633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,bfloat16,2047,0.020185600221157073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,fp8,1,0.028908801078796387
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,bfloat16,3,0.026097598671913146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,fp8,3,0.02932800054550171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,bfloat16,7,0.02603999972343445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,fp8,7,0.028984001278877257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,bfloat16,15,0.02614560127258301
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,fp8,15,0.029259198904037477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,bfloat16,31,0.026030400395393373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,fp8,31,0.029057601094245912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,bfloat16,63,0.026182401180267333
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,bfloat16,127,0.029161599278450013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,fp8,127,0.02901279926300049
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,bfloat16,fp8,255,0.01438400000333786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,fp8,255,0.03854719996452331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,bfloat16,511,0.068259197473526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,fp8,511,0.05472480058670044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,bfloat16,1023,0.11423039436340332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,bfloat16,1,0.04089440107345581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,fp8,1023,0.08102239966392517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,bfloat16,3,0.040884798765182494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,fp8,1,0.04653440117835998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,fp8,3,0.046507200598716734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,bfloat16,7,0.04071680009365082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,fp8,7,0.04669280052185058
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,fp8,15,0.046859198808670045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,bfloat16,31,0.04060640037059784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,fp8,31,0.04689759910106659
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,bfloat16,63,0.043024000525474546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,fp8,63,0.046854400634765626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,bfloat16,127,0.04952160120010376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,fp8,127,0.049937599897384645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,bfloat16,255,0.07478079795837403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,fp8,255,0.06736800074577332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,bfloat16,511,0.1178447961807251
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,fp8,511,0.09573919773101806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,bfloat16,fp8,1023,0.03244479894638062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,bfloat16,1,0.021929599344730377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,bfloat16,1023,0.20618720054626466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,fp8,1023,0.14678879976272582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,fp8,1,0.023612800240516662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,fp8,3,0.023340800404548646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,bfloat16,3,0.02154559940099716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,bfloat16,7,0.02160640060901642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,fp8,7,0.0228752002120018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,bfloat16,15,0.02120800018310547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,fp8,15,0.023622399568557738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,bfloat16,31,0.021139200031757354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,fp8,31,0.02274080067873001
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,bfloat16,63,0.021803200244903564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,fp8,63,0.022915199398994446
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,bfloat16,127,0.02202720046043396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,fp8,127,0.023507200181484222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,bfloat16,255,0.03447040021419525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,bfloat16,fp8,255,0.02724800109863281
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,bfloat16,1,0.02852639853954315
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,fp8,1,0.031465598940849306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,bfloat16,3,0.028273600339889526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,fp8,3,0.03205919861793518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,fp8,7,0.03123520016670227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,bfloat16,15,0.02874079942703247
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,fp8,15,0.03136799931526184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,bfloat16,31,0.028731200098991393
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,fp8,31,0.03129920065402984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,fp8,63,0.03130080103874207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,fp8,63,0.029318401217460634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,bfloat16,127,0.03781439960002899
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,bfloat16,bfloat16,255,0.04694559872150421
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,bfloat16,255,0.049297600984573364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,fp8,255,0.04277600049972534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,fp8,1,0.049472001194953916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,bfloat16,3,0.04320639967918396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,fp8,3,0.04932479858398438
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,bfloat16,bfloat16,15,0.040561598539352414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,bfloat16,7,0.04314239919185638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,fp8,7,0.0491456001996994
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,bfloat16,15,0.04321439862251282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,fp8,15,0.04919199943542481
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,bfloat16,31,0.04333600103855133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,fp8,31,0.049272000789642334
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,bfloat16,63,0.04912799894809723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,fp8,63,0.049779200553894044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,bfloat16,127,0.054660797119140625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,fp8,127,0.05644000172615051
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,bfloat16,255,0.07632319927215576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,fp8,255,0.07067520022392274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,bfloat16,1,0.07127519845962524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,fp8,1,0.0845359981060028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,bfloat16,3,0.07129600048065185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,fp8,3,0.084307199716568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,bfloat16,7,0.07122240066528321
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,fp8,7,0.08436639904975891
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,bfloat16,15,0.0714303970336914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,fp8,15,0.08448960185050965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,bfloat16,31,0.07326880097389221
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,fp8,31,0.08448640108108521
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,bfloat16,63,0.07989119887351989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,fp8,63,0.090939199924469
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,bfloat16,127,0.08643839955329895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,fp8,127,0.09926080107688903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,bfloat16,7,0.02823520004749298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,bfloat16,255,0.12785600423812865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,bfloat16,1,0.03495680093765259
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,bfloat16,63,0.028763198852539064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,fp8,1,0.0381632000207901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,bfloat16,3,0.03496319949626923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,fp8,3,0.03829599916934967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,fp8,7,0.038020798563957216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,bfloat16,15,0.035630398988723756
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,fp8,15,0.03813759982585907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,bfloat16,fp8,127,0.03189600110054016
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,bfloat16,31,0.035699200630187986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,fp8,31,0.03826879858970642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,bfloat16,63,0.039776000380516055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,bfloat16,bfloat16,1,0.04265280067920685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,fp8,63,0.03949280083179474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,bfloat16,127,0.04579040110111236
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,fp8,127,0.04002079963684082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,fp8,1,0.05510240197181702
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,bfloat16,3,0.04901919960975647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,fp8,3,0.05479360222816467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,bfloat16,7,0.04933600127696991
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,fp8,7,0.05505440235137939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,bfloat16,15,0.04897119998931885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,fp8,15,0.055320000648498534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,bfloat16,31,0.05031520128250122
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,fp8,31,0.05507519841194153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,bfloat16,63,0.05770080089569092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,fp8,63,0.057015997171401975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,bfloat16,127,0.06303359866142273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,fp8,127,0.06305760145187378
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,bfloat16,1,0.07679839730262757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,fp8,1,0.08985279798507691
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,bfloat16,3,0.077702397108078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,fp8,3,0.08982719779014588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,bfloat16,7,0.0765999972820282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,fp8,7,0.09051679968833923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,bfloat16,15,0.07786399722099305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,fp8,15,0.08994719982147217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,bfloat16,31,0.0841152012348175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,fp8,31,0.09118239879608155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,bfloat16,63,0.08670880198478699
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,fp8,63,0.09695199728012086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,bfloat16,127,0.09525120258331299
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,bfloat16,fp8,127,0.10292479991912842
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,bfloat16,1,0.13057279586791992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,fp8,1,0.15748159885406493
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,bfloat16,3,0.13169280290603638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,fp8,3,0.15667840242385864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,bfloat16,7,0.13240959644317626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,fp8,7,0.1565551996231079
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,bfloat16,15,0.14024800062179565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,fp8,15,0.15669440031051635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,bfloat16,bfloat16,511,0.016667200624942778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,fp8,31,0.16522400379180907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,bfloat16,63,0.1458415985107422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,bfloat16,1,0.012118399888277055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,fp8,63,0.17161920070648193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,bfloat16,127,0.16038880348205567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,fp8,127,0.1861616015434265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,bfloat16,3,0.011875200271606445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,fp8,3,0.012535999715328216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,bfloat16,7,0.011913599818944931
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,fp8,7,0.012531200051307678
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,bfloat16,15,0.011998400092124939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,fp8,15,0.0126351997256279
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,bfloat16,bfloat16,7,0.035025599598884585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,bfloat16,31,0.011884800344705581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,fp8,31,0.012620800733566284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,bfloat16,63,0.012036799639463424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,bfloat16,127,0.011913599818944931
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,fp8,127,0.01268640011548996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,bfloat16,255,0.013627199828624726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,fp8,255,0.014932799339294433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,bfloat16,511,0.016513599455356597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,fp8,511,0.018321600556373597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,bfloat16,1023,0.016777600347995757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,fp8,1023,0.01791200041770935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,bfloat16,2047,0.017608000338077544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,bfloat16,bfloat16,1,0.04880320131778717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,fp8,2047,0.01823839992284775
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,fp8,1,0.012807999551296235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,bfloat16,3,0.012055999785661697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,fp8,3,0.01266079992055893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,bfloat16,7,0.012006399780511856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,fp8,7,0.0126351997256279
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,bfloat16,15,0.012052799761295318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,fp8,15,0.012624000012874604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,bfloat16,31,0.012017600238323212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,fp8,31,0.01263359934091568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,bfloat16,63,0.011987199634313583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,fp8,63,0.012726399302482604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,bfloat16,127,0.012243200093507767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,fp8,127,0.012912000715732574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,bfloat16,255,0.013537600636482239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,fp8,255,0.014707200229167938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,bfloat16,511,0.01642719954252243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,fp8,511,0.018254399299621582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,bfloat16,1023,0.016700799763202667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,fp8,1023,0.01757279932498932
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,bfloat16,2047,0.018545599281787874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,fp8,2047,0.01809599995613098
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,bfloat16,1,0.012123200297355651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,fp8,1,0.012716799974441528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,bfloat16,3,0.012054400146007537
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,fp8,3,0.012891200184822083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,bfloat16,7,0.012057600170373916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,fp8,7,0.012923200428485871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,bfloat16,15,0.012049599736928939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,fp8,15,0.012932799756526947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,bfloat16,31,0.012062399834394454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,fp8,31,0.012838399410247803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,bfloat16,63,0.012097600102424621
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,fp8,63,0.01308320015668869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,bfloat16,127,0.01228799968957901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,fp8,127,0.012895999848842621
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,bfloat16,255,0.013929599523544311
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,fp8,255,0.014776000380516052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,bfloat16,511,0.01695840060710907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,fp8,511,0.018329599499702455
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,bfloat16,1023,0.01808159947395325
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,fp8,1023,0.0176144003868103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,bfloat16,bfloat16,31,0.14411840438842774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,bfloat16,2047,0.020369599759578704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,bfloat16,fp8,2047,0.02126079946756363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,bfloat16,1,0.01215519979596138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,fp8,1,0.012958399951457977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,bfloat16,3,0.01207360029220581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,fp8,3,0.012932799756526947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,bfloat16,7,0.012057600170373916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,fp8,7,0.012950399518013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,fp8,1,0.012756800651550293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,fp8,15,0.013023999333381654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,bfloat16,31,0.012196800112724305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,fp8,31,0.012878400087356568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,bfloat16,63,0.012036799639463424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,bfloat16,fp8,255,0.12125920057296753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,bfloat16,127,0.012291199713945388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,fp8,127,0.012880000472068786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,fp8,255,0.014867199957370758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,bfloat16,511,0.016964800655841827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,fp8,511,0.018544000387191773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,bfloat16,1023,0.020206399261951447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,bfloat16,fp8,63,0.012535999715328216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,fp8,1023,0.02117439955472946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,bfloat16,2047,0.037859201431274414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,fp8,2047,0.027569600939750673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,bfloat16,1,0.06341760158538819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,fp8,1,0.06601120233535766
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,bfloat16,3,0.06365119814872741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,fp8,3,0.06634719967842102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,bfloat16,7,0.06466240286827088
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,fp8,7,0.06644480228424073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,bfloat16,bfloat16,1,0.011667200177907944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,fp8,15,0.06710079908370972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,bfloat16,31,0.06614400148391723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,fp8,31,0.06740319728851318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,bfloat16,63,0.06859679818153382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,fp8,63,0.07002239823341369
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,bfloat16,1,0.09102079868316651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,bfloat16,3,0.09070559740066528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,fp8,1,0.10064480304718018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,fp8,3,0.10081119537353515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,bfloat16,7,0.0912720024585724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,fp8,7,0.10083999633789062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,bfloat16,15,0.09131199717521668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,fp8,15,0.10222400426864624
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,bfloat16,31,0.09310879707336425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,fp8,31,0.10352319478988647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,bfloat16,63,0.09704480171203614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,bfloat16,fp8,63,0.10668319463729858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,bfloat16,1,0.1473952054977417
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,fp8,1,0.1708224058151245
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,fp8,3,0.1720479965209961
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,bfloat16,7,0.1475152015686035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,fp8,7,0.172052800655365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,bfloat16,15,0.1479375958442688
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,fp8,15,0.17336479425430298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,bfloat16,31,0.1500223994255066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,fp8,31,0.17309119701385497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,bfloat16,63,0.153985595703125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,bfloat16,15,0.012134400010108948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,fp8,63,0.1763375997543335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,fp8,63,0.0130048006772995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,bfloat16,1,0.2646575927734375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,bfloat16,bfloat16,255,0.013956800103187561
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,fp8,1,0.31515040397644045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,bfloat16,3,0.26326398849487304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,fp8,3,0.31331839561462405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,bfloat16,7,0.26464319229125977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,bfloat16,15,0.26710560321807864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,fp8,15,0.3129328012466431
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,bfloat16,31,0.26760480403900144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,fp8,31,0.31396639347076416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,bfloat16,bfloat16,15,0.06402559876441956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,bfloat16,63,0.2711328029632568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,bfloat16,bfloat16,1,0.11087039709091187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,bfloat16,fp8,1,0.11969599723815919
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,bfloat16,bfloat16,3,0.11211680173873902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,fp8,63,0.3175920009613037
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,bfloat16,bfloat16,7,0.11134719848632812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,bfloat16,fp8,7,0.1197808027267456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,bfloat16,bfloat16,15,0.11275999546051026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,bfloat16,fp8,15,0.12095199823379517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,bfloat16,bfloat16,31,0.11348479986190796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,bfloat16,fp8,31,0.12146079540252686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,bfloat16,bfloat16,1,0.16415040493011473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,bfloat16,fp8,1,0.18568320274353028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,bfloat16,bfloat16,3,0.1635696053504944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,bfloat16,fp8,3,0.18521759510040284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,bfloat16,bfloat16,7,0.16485120058059693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,bfloat16,bfloat16,15,0.16437599658966065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,bfloat16,fp8,15,0.18664319515228273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,bfloat16,bfloat16,31,0.16705440282821654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,bfloat16,bfloat16,3,0.1477552056312561
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,bfloat16,fp8,31,0.1895311951637268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,bfloat16,bfloat16,1,0.2730736017227173
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,bfloat16,fp8,1,0.3262063980102539
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,bfloat16,bfloat16,3,0.27674241065979005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,bfloat16,fp8,3,0.32457919120788575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,bfloat16,bfloat16,7,0.274072003364563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,bfloat16,fp8,7,0.32691519260406493
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,bfloat16,bfloat16,15,0.27856481075286865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,bfloat16,fp8,15,0.32597599029541013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,bfloat16,fp8,31,0.33062241077423093
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,bfloat16,bfloat16,1,0.5114816188812256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,bfloat16,fp8,1,0.6044367790222168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,bfloat16,bfloat16,3,0.5105247974395752
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,bfloat16,fp8,7,0.3164351940155029
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,bfloat16,fp8,3,0.6116559982299805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,bfloat16,bfloat16,7,0.5008272171020508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,bfloat16,fp8,7,0.6105423927307129
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,bfloat16,1,0.01223199963569641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,bfloat16,bfloat16,15,0.5097360134124755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,fp8,1,0.012907199561595917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,bfloat16,3,0.012008000165224075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,fp8,3,0.013052800297737121
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,bfloat16,7,0.012031999975442886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,bfloat16,fp8,15,0.6101439952850342
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,bfloat16,bfloat16,31,0.5053135871887207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,fp8,7,0.01292479932308197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,bfloat16,15,0.012001600116491318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,fp8,15,0.012919999659061432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,fp8,31,0.013070400059223174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,bfloat16,63,0.012039999663829803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,bfloat16,fp8,31,0.6127583980560303
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,fp8,63,0.01276479959487915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,bfloat16,127,0.01249919980764389
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,fp8,127,0.012740799784660339
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,fp8,255,0.01480800062417984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,bfloat16,511,0.017025600373744964
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,fp8,511,0.01849119961261749
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,bfloat16,1023,0.017744000256061553
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,fp8,1023,0.018795199692249298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,bfloat16,fp8,7,0.18573759794235228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,bfloat16,2047,0.019548800587654114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,bfloat16,1,0.012174399942159653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,fp8,1,0.012772800028324127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,bfloat16,3,0.012039999663829803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,fp8,3,0.012566399574279786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,fp8,7,0.012807999551296235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,bfloat16,15,0.012041600048542022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,fp8,15,0.012465599924325943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,bfloat16,31,0.012027200311422348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,fp8,31,0.01247519999742508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,bfloat16,63,0.012027200311422348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,fp8,63,0.012775999307632447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,bfloat16,127,0.01225920021533966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,fp8,127,0.012771199643611907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,bfloat16,bfloat16,31,0.2789232015609741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,bfloat16,255,0.014193600416183472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,fp8,255,0.014375999569892883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,bfloat16,511,0.017158399522304534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,fp8,511,0.017951999604701997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,bfloat16,1023,0.018479999899864197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,fp8,1023,0.017905600368976593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,bfloat16,2047,0.020897600054740905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,fp8,2047,0.02141920030117035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,bfloat16,1,0.01213119998574257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,fp8,1,0.01265760064125061
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,bfloat16,3,0.012145599722862244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,fp8,3,0.012681600451469422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,bfloat16,7,0.012009599804878235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,fp8,7,0.01268640011548996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,bfloat16,15,0.012129600346088409
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,fp8,15,0.012724800407886505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,bfloat16,31,0.01220960021018982
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,fp8,31,0.012932799756526947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,bfloat16,63,0.012241599708795547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,fp8,63,0.012904000282287598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,bfloat16,127,0.012467200309038163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,fp8,127,0.012918399274349212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,bfloat16,255,0.014094400405883788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,fp8,255,0.014678399264812469
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,bfloat16,511,0.01720480024814606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,fp8,511,0.01838400065898895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,bfloat16,1023,0.02050720006227493
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,fp8,1023,0.021158400177955627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,bfloat16,fp8,3,0.1212831974029541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,bfloat16,2047,0.03848640024662018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,bfloat16,fp8,2047,0.02759360074996948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,bfloat16,1,0.01231200024485588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,bfloat16,3,0.012145599722862244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,fp8,3,0.013105599582195282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,bfloat16,7,0.01207360029220581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,fp8,7,0.013167999684810638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,bfloat16,15,0.012065599858760833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,bfloat16,31,0.011827199906110763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,fp8,15,0.013193599879741669
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,fp8,31,0.012969599664211273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,bfloat16,63,0.012350399792194367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,bfloat16,255,0.014081600308418273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,bfloat16,31,0.01210239976644516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,bfloat16,127,0.012439999729394913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,fp8,127,0.01319040060043335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,bfloat16,255,0.01401119977235794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,fp8,255,0.015091200172901154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,bfloat16,fp8,2047,0.019276799261569978
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,fp8,511,0.01839199960231781
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,bfloat16,1023,0.03670560121536255
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,bfloat16,bfloat16,7,0.012057600170373916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,fp8,1023,0.026081600785255434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,bfloat16,2047,0.058739197254180905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,fp8,2047,0.04156320095062256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,1,0.013227200508117676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,1,0.013980799913406372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,3,0.01324319988489151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,7,0.013391999900341034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,7,0.013988800346851349
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,15,0.013222399353981017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,15,0.01398719996213913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,31,0.013201600313186646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,31,0.014259199798107147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,63,0.013259199261665345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,63,0.014177599549293518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,127,0.013814400136470794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,127,0.014044800400733947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,255,0.015223999321460725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,255,0.015937599539756774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,511,0.018236799538135527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,511,0.019499200582504272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,1023,0.024140800535678863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,1023,0.02361599951982498
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,2047,0.04233439862728119
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,2047,0.030935999751091004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,bfloat16,4095,0.06413120031356812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,4095,0.04583199918270111
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,1,0.014217600226402283
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,3,0.01419679969549179
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,7,0.013339200615882873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,7,0.01408960074186325
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,15,0.013321599364280701
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,15,0.014056000113487243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,31,0.013339200615882873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,31,0.01406240016222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,63,0.013385599851608277
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,63,0.014153599739074707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,fp8,1,0.01300320029258728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,127,0.014071999490261078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,127,0.014246399700641631
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,255,0.01574240028858185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,255,0.016139200329780577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,511,0.018980799615383147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,511,0.019708800315856933
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,fp8,63,0.012990400195121765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,1023,0.03903680145740509
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,1023,0.02810400128364563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,bfloat16,bfloat16,511,0.01793919950723648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,2047,0.04218400120735168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,1,0.017339199781417847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,4095,0.1060960054397583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,fp8,4095,0.06804159879684449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,1,0.018719999492168425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,3,0.01729599982500076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,bfloat16,fp8,3,0.01432960033416748
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,7,0.01709599941968918
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,7,0.01900320053100586
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,15,0.018724800646305086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,15,0.017411200702190398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,31,0.017524799704551695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,31,0.018729600310325622
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,63,0.018907199800014495
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,127,0.01749439984560013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,127,0.01886080056428909
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,255,0.021252800524234772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,255,0.022968000173568724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,511,0.030665600299835206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,1,0.013342399895191193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,3,0.013473600149154663
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,1023,0.04714080095291138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,2047,0.10831040143966675
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,4095,0.19284000396728515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,4095,0.12049119472503662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,1,0.02460159957408905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,1,0.0274944007396698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,3,0.024553599953651428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,3,0.027540799975395203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,7,0.02444159984588623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,7,0.02738400101661682
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,15,0.02447039932012558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,15,0.027478399872779845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,31,0.024460799992084503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,31,0.027456000447273254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,63,0.024747200310230255
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,bfloat16,bfloat16,2047,0.060915201902389526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,63,0.027544000744819643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,127,0.025760000944137572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,255,0.04374560117721558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,3,0.018931199610233308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,255,0.03612000048160553
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,511,0.06468319892883301
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,511,0.05278880000114441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,63,0.017419199645519256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,1023,0.11035840511322022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,1023,0.0792240023612976
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,2047,0.19576319456100463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,511,0.03856000006198883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,2047,0.12671680450439454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,1,0.0114656001329422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,bfloat16,1023,0.06305919885635376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,3,0.011430399864912033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,3,0.012428800016641617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,7,0.011499200016260147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,bfloat16,4095,0.36428160667419435
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,7,0.012428800016641617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,4095,0.22689919471740722
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,31,0.011593600362539291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,bfloat16,fp8,2047,0.07178559899330139
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,15,0.012372799962759019
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,31,0.012406399846076966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,127,0.01154559999704361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,63,0.012459199875593185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,127,0.0124719999730587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,255,0.013172799348831176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,255,0.014310400187969207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,511,0.016204799711704253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,511,0.01828320026397705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,1023,0.015892800688743592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,1023,0.017140799760818483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,2047,0.01637600064277649
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,4095,0.017089599370956422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,1,0.011519999802112579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,1,0.01233920007944107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,3,0.011528000235557556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,3,0.012359999865293504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,7,0.01144160032272339
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,7,0.012342400103807449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,15,0.011448000371456147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,15,0.012385600060224534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,31,0.011539199948310852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,31,0.01231359988451004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,63,0.011598400026559829
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,63,0.012275200337171555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,127,0.011619199812412263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,127,0.01226240023970604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,255,0.013167999684810638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,255,0.014241600036621093
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,511,0.016072000563144683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,511,0.018140800297260284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,1023,0.015736000239849092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,1023,0.016708800196647645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,2047,0.015990400314331056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,2047,0.017206400632858276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,bfloat16,4095,0.016704000532627106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,bfloat16,fp8,4095,0.017849600315093993
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,1,0.011457599699497223
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,bfloat16,fp8,127,0.027556800842285158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,1,0.012200000137090683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,3,0.011691199988126755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,3,0.01218400001525879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,7,0.011727999895811081
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,7,0.012216000258922577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,15,0.01170559972524643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,15,0.012191999703645706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,31,0.011683200299739838
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,31,0.012328000366687774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,63,0.011664000153541566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,63,0.012331199645996094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,127,0.011715199798345566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,127,0.012353599816560746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,255,0.013382400572299957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,255,0.014776000380516052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,511,0.016383999586105348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,511,0.018145599961280824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,1023,0.016113600134849547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,1023,0.01701119989156723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,2047,0.01626880019903183
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,1,0.01244800016283989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,2047,0.017131200432777403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,fp8,4095,0.017924800515174866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,1,0.011537600308656693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,1,0.012432000041007996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,15,0.011468800157308579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,3,0.011519999802112579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,3,0.01242400035262108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,7,0.011358399689197541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,7,0.012251199781894683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,15,0.01157120019197464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,bfloat16,63,0.011574400216341018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,15,0.01215839982032776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,31,0.011664000153541566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,31,0.012444800138473511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,63,0.01207199990749359
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,63,0.012167999893426895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,127,0.012464000284671784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,255,0.013273599743843078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,511,0.01634719967842102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,511,0.018483200669288637
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,2047,0.017214399576187134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,1023,0.015863999724388123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,bfloat16,fp8,4095,0.017766399681568144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,1023,0.017059199512004852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,2047,0.017518399655818938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,2047,0.016344000399112702
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,4095,0.018324799835681915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,1,0.011747200042009354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,1,0.012324800342321396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,3,0.011430399864912033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,3,0.012414400279521943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,7,0.011635199934244157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,7,0.012307199835777282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,15,0.011660800129175187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,15,0.012328000366687774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,31,0.011513599753379821
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,31,0.012451200187206269
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,63,0.012270399928092956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,127,0.011766400188207626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,127,0.012347199767827988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,255,0.013327999413013459
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,255,0.014286400377750396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,511,0.016444799304008485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,511,0.01828639954328537
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,1023,0.01643040031194687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,2047,0.016846400499343873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,1023,0.017203199863433837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,2047,0.017825600504875184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,4095,0.017272000014781953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,fp8,4095,0.018275199830532073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,1,0.01141119971871376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,1,0.012457600235939026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,3,0.01141439974308014
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,3,0.01242239996790886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,7,0.011407999694347382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,7,0.012430399656295776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,15,0.011526399850845337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,15,0.012427199631929398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,31,0.0115167997777462
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,31,0.012529599666595458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,63,0.01154239997267723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,63,0.012455999851226807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,127,0.011587200313806533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,127,0.012460800260305405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,255,0.013195200264453888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,255,0.014387199282646179
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,511,0.016227200627326965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,511,0.018262399733066557
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,1023,0.016092799603939056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,1023,0.01703680008649826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,2047,0.016371199488639833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,2047,0.01751199960708618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,bfloat16,4095,0.017342400550842286
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,bfloat16,fp8,4095,0.018025599420070648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,1,0.011751999706029892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,bfloat16,bfloat16,4095,0.01717119961977005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,1,0.012457600235939026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,3,0.01168000027537346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,3,0.012465599924325943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,7,0.01167680025100708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,7,0.012459199875593185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,15,0.011640000343322753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,15,0.012411200255155564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,31,0.011641599982976914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,31,0.012404800206422806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,bfloat16,127,0.011641599982976914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,63,0.011670400202274323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,63,0.012428800016641617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,255,0.0141744002699852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,127,0.012366399914026261
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,127,0.011689600348472596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,255,0.01343040019273758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,511,0.016451199352741242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,1023,0.016176000237464905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,1023,0.0173567995429039
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,2047,0.01679680049419403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,bfloat16,fp8,4095,0.0186271995306015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,2047,0.017591999471187593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,bfloat16,4095,0.01849440038204193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,1,0.011928000301122666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,4095,0.018136000633239745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,1,0.012683199346065521
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,3,0.01194240003824234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,7,0.011796800047159195
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,7,0.012593600153923034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,bfloat16,bfloat16,63,0.011628799885511399
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,15,0.011790399998426437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,15,0.012513600289821625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,31,0.0126351997256279
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,63,0.01175519973039627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,63,0.012670400738716125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,127,0.011739200353622437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,127,0.012668800354003907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,255,0.013710400462150574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,255,0.014587199687957764
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,511,0.016497600078582763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,511,0.018223999440670012
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,1023,0.016251200437545778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,1023,0.017195199429988862
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,2047,0.01810079962015152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,2047,0.017878399789333345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,4095,0.021740800142288207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,4095,0.021792000532150267
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,1,0.012559999525547028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,1,0.013532799482345582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,3,0.012801599502563477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,3,0.013385599851608277
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,7,0.012615999579429627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,7,0.013665600121021271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,15,0.012639999389648438
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,15,0.013606399297714233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,31,0.012676799297332763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,31,0.013444800674915314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,63,0.012726399302482604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,63,0.013841600716114044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,127,0.012904000282287598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,127,0.013457599282264709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,255,0.014974400401115417
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,255,0.015320000052452088
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,511,0.017643199861049653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,511,0.019276799261569978
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,1023,0.019670400023460387
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,1023,0.019227199256420135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,2047,0.022649599611759184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,2047,0.022571200132369997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,bfloat16,4095,0.04143039882183075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,1,0.01266079992055893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,1,0.013502399623394012
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,3,0.012595200538635254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,3,0.013502399623394012
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,7,0.012495999783277511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,7,0.013518400490283966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,15,0.012665599584579468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,15,0.013390399515628815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,31,0.012671999633312225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,31,0.01356000006198883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,63,0.012649600207805634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,63,0.013519999384880067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,127,0.012801599502563477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,127,0.013513599336147309
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,255,0.014529600739479065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,255,0.015476800501346588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,511,0.017558400332927705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,511,0.01911199986934662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,1023,0.02078399956226349
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,1023,0.022191999852657317
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,2047,0.03976480066776276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,2047,0.02994079887866974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,255,0.01419679969549179
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,bfloat16,4095,0.062041598558425906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,bfloat16,fp8,4095,0.044100800156593324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,1,0.012622399628162384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,1,0.013593600690364837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,3,0.012641599774360657
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,3,0.01366720050573349
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,7,0.012736000120639801
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,7,0.013681599497795105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,fp8,3,0.0126351997256279
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,15,0.01273760050535202
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,15,0.013644799590110779
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,31,0.013601599633693695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,31,0.013014400005340576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,63,0.01282079964876175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,bfloat16,bfloat16,31,0.01170559972524643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,63,0.013580800592899322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,127,0.013100799918174744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,127,0.013716800510883332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,255,0.014860799908638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,255,0.015433600544929505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,511,0.01846559941768646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,511,0.01920959949493408
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,1023,0.0370608001947403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,2047,0.05958880186080932
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,bfloat16,4095,0.10387200117111206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,4095,0.06651999950408935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,1,0.01655520051717758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,1,0.01810240000486374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,3,0.016761599481105803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,3,0.01804800033569336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,7,0.016769599914550782
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,7,0.01815840005874634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,15,0.016784000396728515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,15,0.018084800243377684
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,31,0.016808000206947327
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,31,0.018067200481891633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,63,0.016804799437522888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,63,0.018063999712467194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,127,0.017115199565887453
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,127,0.018030400574207305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,255,0.02096640020608902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,255,0.022035199403762817
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,511,0.0298224002122879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,bfloat16,fp8,4095,0.029635199904441835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,1023,0.060817599296569824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,1023,0.04572960138320923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,2047,0.0696943998336792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,2047,0.10499520301818847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,4095,0.18899040222167968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,1,0.011748799681663513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,3,0.011747200042009354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,1,0.012495999783277511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,3,0.012455999851226807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,7,0.011671999841928482
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,7,0.012556800246238708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,15,0.011692799627780914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,15,0.012614400684833526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,31,0.011689600348472596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,31,0.012564800679683685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,63,0.011718399822711945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,63,0.01252640038728714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,127,0.011683200299739838
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,127,0.012532800436019897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,255,0.013252800703048706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,255,0.0144896000623703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,511,0.016212800145149232
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,511,0.01825760006904602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,1023,0.016340799629688263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,1023,0.01743679940700531
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,2047,0.01652960032224655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,2047,0.017822399735450745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,bfloat16,4095,0.017720000445842744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,bfloat16,fp8,4095,0.01828159987926483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,1,0.01170239970088005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,1,0.012486399710178375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,3,0.011692799627780914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,3,0.01239679977297783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,7,0.011670400202274323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,7,0.012406399846076966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,15,0.011697600036859513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,15,0.01241919994354248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,31,0.01165120005607605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,31,0.012417600303888322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,63,0.011648000031709672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,1023,0.026761600375175477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,63,0.012483199685811996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,127,0.011664000153541566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,bfloat16,fp8,2047,0.04167360067367554
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,127,0.012380799651145935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,511,0.01652960032224655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,511,0.01794400066137314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,1023,0.016164800524711607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,1023,0.017132799327373504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,2047,0.016838400065898894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,2047,0.01749120056629181
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,4095,0.01889120042324066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,4095,0.018328000605106354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,1,0.011912000179290772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,1,0.012608000636100769
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,3,0.011934400349855424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,3,0.012615999579429627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,7,0.01175680011510849
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,7,0.012388800084590913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,15,0.01173280030488968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,bfloat16,511,0.03730559945106506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,31,0.011827199906110763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,31,0.012564800679683685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,63,0.011830399930477142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,63,0.012646399438381195
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,127,0.011952000111341477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,127,0.012652799487113953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,bfloat16,fp8,4095,0.12025920152664185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,255,0.013600000739097595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,511,0.016575999557971954
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,255,0.014315199851989747
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,511,0.01846559941768646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,1023,0.01621599942445755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,1023,0.017215999960899352
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,2047,0.018024000525474548
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,2047,0.017713600397109987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,bfloat16,4095,0.021583999693393707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,4095,0.021342399716377258
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,1,0.0118367999792099
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,bfloat16,fp8,511,0.01788640022277832
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,1,0.012835200130939483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,3,0.011710400134325028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,3,0.01271039992570877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,7,0.012041600048542022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,7,0.012559999525547028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,15,0.011872000247240066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,15,0.012932799756526947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,31,0.01196800023317337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,31,0.01268479973077774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,63,0.011947199702262878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,127,0.012038400024175644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,127,0.013025599718093871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,255,0.013716800510883332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,255,0.014715200662612915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,511,0.016843199729919434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,511,0.018457600474357606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,1023,0.01786080002784729
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,1023,0.017632000148296356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,2047,0.020552000403404234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,2047,0.021380800008773803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,bfloat16,4095,0.0372655987739563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,4095,0.027537599205970764
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,bfloat16,1,0.015316799283027649
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,fp8,255,0.014299200475215912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,fp8,1,0.015748800337314607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,bfloat16,3,0.015361599624156952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,bfloat16,7,0.01493919938802719
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,fp8,3,0.015836800634860992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,fp8,7,0.01603199988603592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,bfloat16,15,0.015289600193500518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,fp8,15,0.01603520065546036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,bfloat16,31,0.01517920047044754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,fp8,31,0.016040000319480895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,bfloat16,63,0.015024000406265258
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,fp8,63,0.016041600704193117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,bfloat16,127,0.01600639969110489
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,fp8,127,0.015913599729537965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,bfloat16,255,0.01738400012254715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,fp8,255,0.018054400384426118
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,bfloat16,511,0.021784000098705292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,fp8,511,0.02155359983444214
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,bfloat16,1023,0.04097599983215332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,fp8,1023,0.03124319911003113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,fp8,2047,0.04412319958209991
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,bfloat16,bfloat16,2047,0.06307680010795594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,fp8,1,0.020214399695396422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,bfloat16,3,0.018727999925613404
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,bfloat16,7,0.018745599687099455
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,fp8,3,0.020147199928760528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,bfloat16,fp8,15,0.012406399846076966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,bfloat16,15,0.01849599927663803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,fp8,15,0.02030719965696335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,bfloat16,31,0.018310399353504182
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,fp8,31,0.02018879950046539
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,fp8,63,0.019964799284934998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,bfloat16,127,0.018747200071811677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,fp8,127,0.01996160000562668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,bfloat16,255,0.023632000386714935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,fp8,255,0.023841600120067596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,bfloat16,511,0.041310399770736694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,fp8,511,0.03235679864883423
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,bfloat16,1023,0.06627519726753235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,fp8,1023,0.04860160052776337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,fp8,2047,0.07326719760894776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,bfloat16,2047,0.1097872018814087
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,bfloat16,1,0.025931200385093688
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,fp8,1,0.02911359965801239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,bfloat16,3,0.02593280076980591
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,fp8,3,0.029182401299476624
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,bfloat16,7,0.025947201251983642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,fp8,7,0.02911840081214905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,bfloat16,15,0.025913599133491515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,fp8,15,0.02905600070953369
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,bfloat16,31,0.025905600190162657
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,bfloat16,fp8,63,0.012740799784660339
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,fp8,31,0.029099199175834655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,bfloat16,63,0.025995200872421263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,fp8,63,0.029396799206733704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,bfloat16,127,0.02797600030899048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,fp8,127,0.029491201043128967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,bfloat16,255,0.04616000056266785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,fp8,255,0.03743839859962463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,bfloat16,511,0.06784960031509399
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,fp8,511,0.05479360222816467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,bfloat16,1023,0.1129472017288208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,bfloat16,bfloat16,255,0.013495999574661254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,fp8,1023,0.08218719959259033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,bfloat16,1,0.040838399529457094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,fp8,1,0.047116801142692566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,fp8,2047,0.12981120347976685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,bfloat16,bfloat16,2047,0.19900799989700318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,bfloat16,3,0.040828800201416014
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,fp8,3,0.04711039960384369
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,bfloat16,7,0.04093280136585235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,bfloat16,15,0.04093759953975677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,fp8,7,0.04647200107574463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,fp8,15,0.04726240038871765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,bfloat16,31,0.0408048003911972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,fp8,31,0.0469648003578186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,bfloat16,63,0.043038401007652285
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,fp8,63,0.047310400009155276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,bfloat16,127,0.05013759732246399
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,fp8,127,0.04917120039463043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,bfloat16,255,0.07525119781494141
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,fp8,255,0.0677727997303009
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,bfloat16,511,0.117576003074646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,fp8,511,0.09633439779281616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,fp8,7,0.020398400723934174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,bfloat16,1,0.021011200547218323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,bfloat16,1023,0.2044528007507324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,fp8,1,0.02266400009393692
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,fp8,3,0.022614400088787078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,fp8,2047,0.2429487943649292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,bfloat16,7,0.020921599864959717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,bfloat16,15,0.020924800634384157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,fp8,7,0.02335200011730194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,fp8,15,0.022622400522232057
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,bfloat16,31,0.02099359929561615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,fp8,31,0.02265920042991638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,fp8,63,0.02266719937324524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,bfloat16,127,0.021478399634361267
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,fp8,127,0.022886399924755097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,bfloat16,255,0.02940320074558258
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,fp8,255,0.026545599102973938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,bfloat16,511,0.04514240026473999
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,fp8,511,0.03558880090713501
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,bfloat16,1,0.02826719880104065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,fp8,1,0.03128319978713989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,bfloat16,3,0.02812800109386444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,fp8,3,0.03127520084381104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,bfloat16,7,0.028164801001548768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,fp8,7,0.03127839863300323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,bfloat16,15,0.028299200534820556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,fp8,15,0.03142560124397278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,bfloat16,31,0.028369599580764772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,fp8,31,0.03152639865875244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,bfloat16,63,0.02841919958591461
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,fp8,63,0.03133760094642639
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,bfloat16,1,0.018739199638366698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,bfloat16,127,0.033055999875068666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,fp8,127,0.03145439922809601
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,bfloat16,255,0.049563199281692505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,fp8,255,0.04069119989871979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,bfloat16,511,0.0718384027481079
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,bfloat16,bfloat16,63,0.018796800076961516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,fp8,1023,0.14721920490264892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,bfloat16,fp8,511,0.05725600123405457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,bfloat16,1,0.04299359917640686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,fp8,1,0.048851200938224794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,bfloat16,3,0.020975999534130096
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,bfloat16,3,0.04323680102825165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,fp8,3,0.04909600019454956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,fp8,7,0.04883840084075928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,bfloat16,bfloat16,2047,0.3743551969528198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,bfloat16,15,0.043017598986625674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,fp8,15,0.049358400702476504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,bfloat16,31,0.043198400735855104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,fp8,31,0.04889279901981354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,bfloat16,127,0.05424799919128418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,fp8,63,0.049323201179504395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,bfloat16,63,0.0471343994140625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,bfloat16,255,0.07557600140571594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,bfloat16,bfloat16,63,0.02096800059080124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,fp8,255,0.06978240013122558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,fp8,511,0.09819520115852357
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,bfloat16,1,0.07062559723854064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,fp8,1,0.084443199634552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,bfloat16,3,0.0715279996395111
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,bfloat16,7,0.07079520225524902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,fp8,7,0.08451039791107177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,fp8,3,0.08408640027046203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,fp8,15,0.08401920199394226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,fp8,31,0.08406559824943542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,bfloat16,63,0.07940160036087036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,fp8,63,0.08891040086746216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,bfloat16,127,0.0886784017086029
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,fp8,127,0.09816799759864807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,bfloat16,255,0.132534396648407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,fp8,255,0.12052320241928101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,bfloat16,511,0.21899039745330812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,fp8,511,0.17926880121231079
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,bfloat16,1,0.03438400030136109
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,fp8,1,0.03743999898433685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,bfloat16,3,0.03445279896259308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,fp8,3,0.037574398517608645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,bfloat16,7,0.03452480137348175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,fp8,7,0.03768959939479828
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,bfloat16,15,0.03456799983978272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,fp8,15,0.0377487987279892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,bfloat16,31,0.0348800003528595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,fp8,31,0.03778879940509796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,bfloat16,63,0.035622400045394895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,fp8,63,0.037673598527908324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,bfloat16,127,0.04320479929447174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,fp8,127,0.038227200508117676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,bfloat16,255,0.0559440016746521
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,bfloat16,7,0.04307680130004883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,bfloat16,1,0.04925119876861572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,fp8,1,0.054523199796676636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,bfloat16,3,0.04919039905071258
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,fp8,3,0.05467519760131836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,fp8,127,0.05323839783668518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,bfloat16,7,0.04920159876346588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,fp8,7,0.0545632004737854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,fp8,15,0.05514079928398132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,bfloat16,31,0.04869759976863861
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,bfloat16,bfloat16,511,0.11924480199813843
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,fp8,31,0.05531200170516968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,bfloat16,63,0.0548799991607666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,fp8,63,0.05524320006370544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,bfloat16,15,0.0718720018863678
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,bfloat16,127,0.06053280234336853
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,fp8,127,0.06071839928627014
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,bfloat16,bfloat16,31,0.072598397731781
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,bfloat16,255,0.08297759890556336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,bfloat16,1,0.07782400250434876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,fp8,1,0.08928800225257874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,bfloat16,3,0.07748799920082092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,fp8,3,0.08934400081634522
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,bfloat16,7,0.07780640125274658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,fp8,7,0.08961920142173767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,bfloat16,15,0.0772816002368927
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,fp8,15,0.08937919735908509
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,bfloat16,31,0.08016639947891235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,fp8,31,0.08986719846725463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,fp8,63,0.09528480172157287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,bfloat16,127,0.08961920142173767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,fp8,127,0.10057439804077148
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,bfloat16,255,0.13244960308074952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,fp8,255,0.1270624041557312
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,bfloat16,1,0.12975360155105592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,bfloat16,3,0.13187520503997802
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,fp8,1,0.15664160251617432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,fp8,3,0.15670080184936525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,bfloat16,7,0.13182879686355592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,fp8,7,0.1564751982688904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,bfloat16,15,0.13033759593963623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,fp8,15,0.15852960348129272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,bfloat16,31,0.14106719493865966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,fp8,31,0.15705920457839967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,bfloat16,63,0.14425920248031615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,fp8,63,0.16951359510421754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,bfloat16,127,0.15718239545822144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,fp8,127,0.1841279983520508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,1,0.011761599779129028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,bfloat16,255,0.23640639781951905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,bfloat16,fp8,255,0.22347359657287597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,1,0.012539200484752655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,3,0.011771199852228164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,3,0.012630400061607362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,bfloat16,fp8,255,0.048107200860977174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,7,0.012671999633312225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,15,0.011905600130558015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,15,0.012520000338554382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,31,0.01271039992570877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,63,0.011881600320339202
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,63,0.012654399871826172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,127,0.011975999921560287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,127,0.012615999579429627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,255,0.013500800728797913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,255,0.014636799693107605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,511,0.016441600024700166
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,511,0.018089599907398224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,bfloat16,15,0.04891520142555237
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,1023,0.016518400609493257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,1023,0.01744319945573807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,2047,0.017321600019931792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,4095,0.01897920072078705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,4095,0.01919199973344803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,1,0.011999999731779098
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,1,0.012987199425697326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,3,0.012164799869060517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,3,0.01263359934091568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,7,0.011990399658679962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,bfloat16,fp8,255,0.0745311975479126
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,7,0.012678399682044983
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,15,0.012088000029325485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,15,0.012830400466918945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,31,0.012121599912643433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,31,0.012857599556446076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,63,0.01199359968304634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,63,0.012831999361515046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,127,0.011979199945926666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,127,0.012571200728416443
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,bfloat16,bfloat16,63,0.08406720161437989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,255,0.01372320055961609
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,511,0.016495999693870545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,511,0.018380799889564516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,1023,0.016648000478744505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,1023,0.01722240000963211
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,2047,0.018185600638389587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,2047,0.01804800033569336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,bfloat16,4095,0.022435200214385987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,4095,0.02142080068588257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,1,0.012145599722862244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,1,0.012724800407886505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,3,0.01199520006775856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,3,0.01284160017967224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,7,0.011876799911260606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,7,0.012750400602817536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,15,0.012080000340938568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,15,0.012736000120639801
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,31,0.012081599980592727
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,31,0.012755200266838074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,63,0.011827199906110763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,63,0.012947200238704682
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,127,0.012345600128173827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,127,0.012604799866676331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,255,0.013860799372196198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,255,0.0146479994058609
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,511,0.016599999368190767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,511,0.018452799320220946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,1023,0.018110400438308714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,1023,0.017526400089263917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,2047,0.0204927995800972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,2047,0.021347199380397797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,bfloat16,4095,0.03752799928188324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,bfloat16,fp8,4095,0.028009599447250365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,1,0.012172800302505494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,1,0.01292639970779419
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,3,0.012851199507713318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,7,0.012089599668979645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,15,0.011985599994659424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,15,0.012775999307632447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,31,0.012006399780511856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,31,0.01273760050535202
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,7,0.011812800168991089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,63,0.011982399970293045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,63,0.012932799756526947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,127,0.01204639971256256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,bfloat16,31,0.011817599833011627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,127,0.012908799946308136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,255,0.013873599469661713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,255,0.01488959938287735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,511,0.017022399604320525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,511,0.018361599743366243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,1023,0.020395199954509734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,1023,0.021044799685478212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,2047,0.03722560107707977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,2047,0.027327999472618103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,4095,0.059412801265716554
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,4095,0.04263199865818024
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,bfloat16,1,0.05607680082321167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,bfloat16,fp8,2047,0.017817600071430205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,fp8,1,0.061990398168563846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,bfloat16,3,0.056884801387786864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,fp8,3,0.061806398630142215
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,fp8,7,0.06204000115394592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,bfloat16,15,0.0578607976436615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,fp8,15,0.06293759942054748
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,bfloat16,31,0.06080319881439209
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,fp8,31,0.06277920007705688
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,bfloat16,63,0.0648256003856659
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,fp8,63,0.06693440079689025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,fp8,127,0.06955680251121521
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,bfloat16,127,0.06964319944381714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,bfloat16,1,0.08454560041427613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,fp8,1,0.09541760087013244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,bfloat16,fp8,255,0.014519999921321868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,fp8,3,0.09595999717712403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,bfloat16,7,0.08506399989128113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,bfloat16,15,0.0869264006614685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,fp8,7,0.09530240297317505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,fp8,15,0.09631040096282958
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,bfloat16,31,0.08918079733848572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,fp8,31,0.09894239902496338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,bfloat16,63,0.09354559779167175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,fp8,63,0.10275839567184449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,bfloat16,127,0.1013983964920044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,fp8,127,0.10588480234146118
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,bfloat16,1,0.14128320217132567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,fp8,1,0.16285120248794555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,bfloat16,3,0.14179359674453734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,fp8,3,0.16355520486831665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,bfloat16,7,0.14342399835586547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,fp8,7,0.16387840509414672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,bfloat16,15,0.1449887990951538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,fp8,15,0.16562880277633668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,fp8,7,0.012835200130939483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,bfloat16,31,0.14622559547424316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,fp8,31,0.17157440185546874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,bfloat16,63,0.15043679475784302
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,fp8,63,0.17308160066604614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,bfloat16,127,0.16123839616775512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,bfloat16,fp8,127,0.1793887972831726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,bfloat16,1,0.2580032110214233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,bfloat16,3,0.26054720878601073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,fp8,1,0.2958479881286621
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,fp8,3,0.29328320026397703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,bfloat16,7,0.2622175931930542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,fp8,7,0.2951488018035889
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,bfloat16,bfloat16,7,0.05700960159301758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,bfloat16,15,0.26033759117126465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,bfloat16,31,0.26722719669342043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,fp8,15,0.308950400352478
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,fp8,31,0.31616480350494386
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,bfloat16,63,0.2654063940048218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,fp8,63,0.3150831937789917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,bfloat16,1,0.10571520328521729
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,fp8,1,0.11624000072479249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,bfloat16,bfloat16,3,0.08494880199432372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,bfloat16,3,0.10590399503707885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,fp8,3,0.11605759859085082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,fp8,127,0.3428368091583252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,bfloat16,7,0.10590560436248779
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,bfloat16,15,0.10561920404434204
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,fp8,7,0.11589920520782471
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,bfloat16,31,0.10726720094680786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,fp8,31,0.11787999868392944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,bfloat16,63,0.11134079694747925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,fp8,63,0.12024320363998413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,bfloat16,1,0.15930399894714356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,fp8,1,0.18309439420700074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,bfloat16,3,0.15829600095748902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,bfloat16,7,0.15850080251693727
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,fp8,3,0.18279680013656616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,bfloat16,15,0.15855200290679933
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,fp8,7,0.18284000158309938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,fp8,15,0.18342080116271972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,bfloat16,31,0.16101280450820923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,fp8,31,0.1849071979522705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,bfloat16,63,0.16665120124816896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,bfloat16,bfloat16,3,0.012068799883127212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,bfloat16,fp8,63,0.18778560161590577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,bfloat16,1,0.2701744079589844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,fp8,1,0.31738719940185545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,bfloat16,3,0.26963999271392824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,bfloat16,7,0.27019200325012205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,fp8,3,0.32273120880126954
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,fp8,7,0.32069759368896483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,bfloat16,15,0.26826560497283936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,fp8,15,0.3221791982650757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,fp8,31,0.32632319927215575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,bfloat16,63,0.278603196144104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,fp8,63,0.32550559043884275
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,bfloat16,1,0.49799361228942873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,bfloat16,3,0.4980207920074463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,fp8,1,0.6116208076477051
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,fp8,3,0.5923823833465576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,bfloat16,7,0.496779203414917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,fp8,7,0.6052271842956543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,bfloat16,15,0.5044911861419678
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,bfloat16,bfloat16,127,0.28849599361419676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,fp8,15,0.5933680057525634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,bfloat16,31,0.5019008159637451
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,bfloat16,fp8,15,0.11700960397720336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,1,0.011611200124025344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,1,0.012468799948692322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,3,0.012127999961376191
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,3,0.012451200187206269
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,7,0.011608000099658965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,7,0.012905600666999816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,15,0.01178399994969368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,bfloat16,63,0.5130864143371582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,31,0.012113600224256515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,fp8,31,0.6031904220581055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,31,0.012439999729394913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,bfloat16,fp8,63,0.6073743820190429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,63,0.013041600584983826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,63,0.012006399780511856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,127,0.012627199292182922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,127,0.011905600130558015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,511,0.016758400201797485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,255,0.013990400731563568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,511,0.018328000605106354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,255,0.014735999703407287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,1023,0.017665599286556245
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,2047,0.019062399864196777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,2047,0.01855839937925339
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,1,0.011972799897193909
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,4095,0.023384000360965728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,4095,0.02213599979877472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,1,0.013064000010490417
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,3,0.012107200175523757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,3,0.01276639997959137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,7,0.01210559979081154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,15,0.012404800206422806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,7,0.012652799487113953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,31,0.012011200189590454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,31,0.013059200346469879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,63,0.012151999771595002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,63,0.012614400684833526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,127,0.012958399951457977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,255,0.013736000657081604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,255,0.014601600170135499
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,511,0.016700799763202667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,511,0.01847040057182312
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,1023,0.017492799460887908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,1023,0.017627200484275816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,2047,0.020190399885177613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,2047,0.02131839990615845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,4095,0.03826879858970642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,4095,0.027265599370002745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,bfloat16,bfloat16,31,0.2733567953109741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,1,0.011761599779129028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,1,0.01276639997959137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,3,0.01257600039243698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,7,0.012011200189590454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,7,0.012559999525547028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,15,0.012135999649763108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,15,0.012574400007724761
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,31,0.01218079999089241
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,31,0.01300159990787506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,63,0.012191999703645706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,63,0.012455999851226807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,127,0.01236959993839264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,127,0.012415999919176102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,255,0.014019200205802917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,255,0.014336000382900237
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,511,0.01706400066614151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,511,0.017951999604701997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,1023,0.019819200038909912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,1023,0.020436799526214598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,2047,0.03821600079536438
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,2047,0.02656320035457611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,4095,0.06066399812698364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,fp8,4095,0.041536000370979306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,1,0.01212640032172203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,1,0.012596799433231354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,3,0.012483199685811996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,7,0.01170400008559227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,7,0.01257600039243698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,15,0.011817599833011627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,15,0.012641599774360657
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,31,0.011801599711179733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,31,0.012604799866676331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,63,0.011819200217723846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,63,0.012483199685811996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,bfloat16,1023,0.01671680063009262
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,127,0.01199359968304634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,127,0.012646399438381195
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,255,0.013752000033855438
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,255,0.01451520025730133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,511,0.01709599941968918
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,fp8,15,0.012860800325870513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,511,0.01809120029211044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,1023,0.036073601245880126
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,1023,0.02677280008792877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,bfloat16,bfloat16,127,0.012399999797344208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,2047,0.05831840038299561
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,2047,0.04076800048351288
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,1,0.013340799510478974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,4095,0.10169919729232788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,fp8,4095,0.06716319918632507
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,3,0.013545599579811097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,3,0.014135999977588654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,7,0.014398400485515595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,15,0.013278399407863618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,15,0.01440960019826889
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,31,0.01327040046453476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,31,0.013976000249385834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,63,0.013344000279903411
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,63,0.014158399403095245
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,127,0.013523200154304504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,bfloat16,bfloat16,3,0.012142399698495865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,127,0.01387999951839447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,255,0.01528480052947998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,255,0.016073599457740784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,511,0.0195375993847847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,1023,0.022308799624443054
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,1023,0.022603200376033784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,2047,0.040012800693511964
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,2047,0.029120001196861266
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,4095,0.062224000692367554
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,4095,0.04368480145931244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,8191,0.06886240243911743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,8191,0.10634880065917969
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,1,0.013214400410652161
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,1,0.014475199580192565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,3,0.01329759955406189
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,3,0.014449599385261535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,7,0.013324800133705138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,7,0.014457599818706512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,15,0.01334560066461563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,15,0.014215999841690063
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,31,0.013380800187587739
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,31,0.014420799911022186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,63,0.013569599390029908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,63,0.014427199959754944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,127,0.013740800321102142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,127,0.014414399862289429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,bfloat16,fp8,15,0.012380799651145935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,255,0.015662400424480437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,255,0.01645279973745346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,511,0.018855999410152435
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,511,0.019908800721168518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,1023,0.0383296012878418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,2047,0.05965759754180908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,2047,0.04239999949932098
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,4095,0.10541119575500488
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,4095,0.06718720197677612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,8191,0.11887359619140625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,bfloat16,8191,0.19060800075531006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,fp8,1,0.014262400567531586
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,1,0.017280000448226928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,1,0.01873279958963394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,3,0.01894879937171936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,7,0.013395200669765472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,7,0.017195199429988862
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,7,0.018795199692249298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,15,0.017246399819850922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,31,0.01743679940700531
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,31,0.018780800700187682
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,63,0.01748320013284683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,63,0.018795199692249298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,127,0.017531199753284453
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,127,0.018883199989795686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,255,0.021347199380397797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,bfloat16,bfloat16,511,0.01839359998703003
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,255,0.02285439968109131
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,511,0.03814719915390015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,511,0.031079998612403868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,1023,0.0633791983127594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,1023,0.04689919948577881
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,2047,0.07099199891090394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,4095,0.19078559875488282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,4095,0.12058559656143189
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,8191,0.22048640251159668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,bfloat16,bfloat16,3,0.012280000001192093
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,8191,0.3621599912643433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,1,0.02757439911365509
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,1,0.02505599856376648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,3,0.0247296005487442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,3,0.027563199400901794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,7,0.02465119957923889
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,7,0.027558401226997375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,15,0.027694401144981385
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,31,0.02515999972820282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,31,0.027958399057388304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,63,0.024873599410057068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,63,0.028003200888633728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,127,0.02563680112361908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,127,0.027667200565338133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,bfloat16,fp8,1023,0.02709279954433441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,255,0.043222400546073916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,255,0.03600800037384033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,511,0.05276640057563782
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,511,0.06303200125694275
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,1023,0.1093567967414856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,1023,0.07864800095558167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,2047,0.12573280334472656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,2047,0.1940608024597168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,3,0.017371200025081635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,4095,0.36354238986968995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,4095,0.22456319332122804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,fp8,15,0.01873439997434616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,1,0.01141439974308014
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,3,0.01156800016760826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,1,0.012358400225639343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,3,0.012243200093507767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,7,0.011575999855995178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,7,0.01231200024485588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,15,0.011670400202274323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,15,0.01242239996790886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,31,0.011479999870061874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,31,0.012515200674533844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,63,0.011694400012493134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,63,0.012342400103807449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,8191,0.7069952011108398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,127,0.011716800183057785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,127,0.01234079971909523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,255,0.013020800054073333
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,fp8,8191,0.4250016212463379
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,511,0.016366399824619293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,511,0.018308800458908082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,255,0.014472000300884247
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,2047,0.017441600561141968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,bfloat16,bfloat16,2047,0.10616639852523804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,4095,0.01780160069465637
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,4095,0.016953599452972413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,8191,0.017900800704956053
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,8191,0.019079999625682832
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,1,0.011724799871444702
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,1,0.012415999919176102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,3,0.011575999855995178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,3,0.01226079985499382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,7,0.01225920021533966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,15,0.011521600186824799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,15,0.012366399914026261
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,31,0.011641599982976914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,31,0.012427199631929398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,63,0.012374400347471236
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,127,0.011596799641847611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,127,0.012160000205039979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,255,0.013153600692749023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,255,0.014215999841690063
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,bfloat16,bfloat16,15,0.024617600440979003
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,511,0.01815200001001358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,1023,0.015897600352764128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,1023,0.016787199676036833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,2047,0.016220800578594208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,2047,0.01733119934797287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,4095,0.016814400255680085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,4095,0.0177824005484581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,8191,0.01836320012807846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,fp8,8191,0.018787199258804323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,1,0.011524800211191177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,1,0.012350399792194367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,3,0.011532799899578094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,3,0.012347199767827988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,7,0.011519999802112579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,7,0.012303999811410903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,15,0.011531200259923935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,15,0.012246400117874146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,31,0.011535999923944473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,31,0.01226079985499382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,63,0.011459200084209442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,63,0.012398400157690049
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,127,0.011641599982976914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,1023,0.015489600598812103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,127,0.012379200011491776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,bfloat16,2047,0.01615999937057495
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,255,0.014404800534248353
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,bfloat16,fp8,1023,0.016624000668525696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,511,0.018297599256038667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,511,0.016363200545310975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,1023,0.015492799878120422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,1023,0.016923199594020843
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,2047,0.016230399906635284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,2047,0.01703999936580658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,4095,0.016892799735069276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,7,0.011598400026559829
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,4095,0.017718400061130523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,8191,0.018881599605083465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,fp8,8191,0.018972800672054292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,1,0.012392000108957291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,3,0.01135680004954338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,3,0.012228800356388092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,7,0.011718399822711945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,63,0.01170239970088005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,7,0.012169600278139115
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,15,0.011582399904727935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,15,0.012462399899959564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,31,0.01173280030488968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,63,0.011584000289440155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,63,0.012433599680662155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,127,0.011558400094509124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,127,0.012355200201272964
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,255,0.013406400382518769
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,255,0.014351999759674073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,511,0.016246399283409117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,511,0.018156799674034118
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,1023,0.015731200575828552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,1023,0.017019200325012206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,2047,0.016444799304008485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,2047,0.017318400740623473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,bfloat16,bfloat16,511,0.016123199462890626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,4095,0.01786080002784729
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,8191,0.02245279997587204
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,1,0.011515200138092041
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,8191,0.022041599452495574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,1,0.012201599776744843
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,3,0.011529599875211715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,3,0.012399999797344208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,7,0.011372800171375274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,7,0.01239520013332367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,15,0.011715199798345566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,15,0.012219200283288956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,31,0.011574400216341018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,31,0.012294399738311767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,63,0.011588799953460693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,127,0.011747200042009354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,127,0.012307199835777282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,255,0.013064000010490417
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,255,0.014547200500965118
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,511,0.016123199462890626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,511,0.01812479943037033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,1023,0.015878400206565856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,1023,0.01677920073270798
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,2047,0.01603199988603592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,2047,0.01754239946603775
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,4095,0.016784000396728515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,4095,0.017705599963665008
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,bfloat16,8191,0.018532800674438476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,8191,0.01881439983844757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,1,0.011454399675130844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,1,0.01228479966521263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,3,0.012270399928092956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,7,0.011558400094509124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,7,0.012276799976825714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,15,0.01167839989066124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,15,0.012406399846076966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,31,0.011777599900960922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,31,0.01241919994354248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,63,0.011771199852228164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,63,0.012388800084590913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,127,0.01157120019197464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,127,0.012348800152540206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,255,0.01326880007982254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,255,0.0143327996134758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,bfloat16,bfloat16,255,0.013129599392414093
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,511,0.0162992000579834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,511,0.018223999440670012
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,1023,0.01602399945259094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,1023,0.016972799599170686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,2047,0.01639840006828308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,2047,0.017451199889183044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,4095,0.017374399304389953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,4095,0.018222400546073915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,8191,0.019468800723552705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,fp8,8191,0.019300800561904908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,1,0.011577600240707397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,1,0.012513600289821625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,fp8,31,0.012223999947309494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,3,0.012449599802494049
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,7,0.01241919994354248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,15,0.01165279969573021
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,15,0.012465599924325943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,31,0.01162559986114502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,4095,0.018305599689483643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,31,0.0125231996178627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,63,0.011563199758529662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,63,0.012505599856376648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,127,0.01156959980726242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,127,0.012540799379348756
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,255,0.013203200697898865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,255,0.014478400349617004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,511,0.016166399419307708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,511,0.01812639981508255
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,bfloat16,fp8,63,0.012196800112724305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,1023,0.01581120043992996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,1023,0.016974399983882903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,2047,0.017238399386405943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,2047,0.01690240055322647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,4095,0.018193599581718446
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,4095,0.018036800622940063
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,8191,0.022017599642276765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,fp8,8191,0.02189600020647049
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,1,0.01173119992017746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,1,0.012401600182056428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,3,0.011819200217723846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,3,0.012406399846076966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,7,0.011683200299739838
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,7,0.012544000148773193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,15,0.011580800265073776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,bfloat16,bfloat16,3,0.011315199732780456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,31,0.011854399740695954
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,31,0.012352000176906585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,63,0.011713600158691407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,127,0.011713600158691407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,127,0.012455999851226807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,255,0.013601599633693695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,255,0.014419199526309967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,511,0.01661120057106018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,511,0.01801760047674179
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,1023,0.01610720008611679
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,1023,0.017083199322223665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,2047,0.017900800704956053
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,2047,0.017689600586891174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,4095,0.021447999775409697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,4095,0.02146719992160797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,bfloat16,8191,0.038078400492668155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,8191,0.027235201001167296
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,1,0.01290079951286316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,1,0.013142399489879608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,3,0.012783999741077422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,3,0.013222399353981017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,7,0.01279039978981018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,7,0.01324319988489151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,15,0.01284639984369278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,15,0.01345279961824417
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,31,0.013523200154304504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,63,0.012508800625801087
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,63,0.013545599579811097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,127,0.012671999633312225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,127,0.01358560025691986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,255,0.01427839994430542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,255,0.015056000649929046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,bfloat16,bfloat16,1,0.011608000099658965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,511,0.01722400039434433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,511,0.01902240067720413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,1023,0.018115200102329254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,1023,0.019009600579738616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,3,0.011617600172758102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,2047,0.02144480049610138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,2047,0.021631999313831328
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,4095,0.039631998538970946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,bfloat16,bfloat16,7,0.01148959994316101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,4095,0.028513601422309874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,8191,0.06101599931716919
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,1,0.012547199428081513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,3,0.012780800461769104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,3,0.013334399461746216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,7,0.01271200031042099
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,7,0.013257600367069244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,15,0.0126351997256279
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,15,0.013247999548912048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,31,0.012798400223255157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,31,0.013278399407863618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,63,0.012758399546146392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,63,0.013214400410652161
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,127,0.012905600666999816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,127,0.01316000074148178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,255,0.014662399888038635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,255,0.015201599895954132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,511,0.01762239933013916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,511,0.018862399458885192
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,1023,0.020640000700950623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,1023,0.021648000180721282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,2047,0.038796800374984744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,2047,0.027820798754692077
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,4095,0.060648000240325926
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,15,0.01239520013332367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,4095,0.04297119975090027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,bfloat16,fp8,63,0.01252640038728714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,1,0.012545600533485413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,bfloat16,8191,0.10602400302886963
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,8191,0.06735519766807556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,3,0.012520000338554382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,3,0.013689599931240082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,7,0.013364799320697784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,15,0.012880000472068786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,15,0.013382400572299957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,31,0.012803199887275695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,31,0.013376000523567199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,63,0.012780800461769104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,63,0.013463999330997466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,127,0.01295360028743744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,127,0.013512000441551208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,255,0.014742399752140044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,255,0.015403200685977936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,511,0.018382400274276733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,511,0.019070400297641753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,1023,0.036471998691558837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,1023,0.026782399415969847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,2047,0.05972800254821777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,bfloat16,31,0.012835200130939483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,4095,0.10306880474090577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,4095,0.06713280081748962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,8191,0.11750719547271729
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,8191,0.1890768051147461
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,1,0.016782400012016297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,1,0.017824000120162962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,3,0.01677920073270798
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,3,0.017868800461292265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,7,0.01677920073270798
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,bfloat16,fp8,8191,0.044095999002456664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,7,0.017862400412559508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,15,0.016764800250530242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,31,0.01668799966573715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,31,0.017955200374126436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,63,0.016702400147914888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,63,0.017956799268722533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,127,0.01706400066614151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,127,0.017969599366188048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,255,0.020921599864959717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,255,0.02208160012960434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,511,0.03789759874343872
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,511,0.02966879904270172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,1023,0.060971200466156006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,2047,0.10436639785766602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,2047,0.06965280175209046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,1,0.013840000331401824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,4095,0.18933759927749633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,bfloat16,7,0.012569600343704223
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,4095,0.11961280107498169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,1,0.01175519973039627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,1,0.012483199685811996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,8191,0.21850240230560303
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,bfloat16,8191,0.3617568016052246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,3,0.011687999963760376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,3,0.012404800206422806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,7,0.012510399520397186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,7,0.01148959994316101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,15,0.011681599915027619
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,15,0.01241919994354248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,31,0.011435200273990632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,63,0.011695999652147293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,63,0.012129600346088409
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,127,0.011828800290822982
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,bfloat16,fp8,2047,0.041247999668121337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,127,0.012135999649763108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,255,0.013542400300502777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,255,0.014148800075054169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,511,0.0160303995013237
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,1023,0.01583999991416931
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,1023,0.01672320067882538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,2047,0.016942399740219116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,2047,0.016145600378513335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,4095,0.016894400119781494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,4095,0.017899200320243835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,bfloat16,8191,0.019064000248908995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,8191,0.019431999325752257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,1,0.011459200084209442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,1,0.012300799787044524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,3,0.01146719977259636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,3,0.012326399981975555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,7,0.011481600254774094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,7,0.01226240023970604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,bfloat16,fp8,1,0.013609600067138673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,15,0.011548800021409988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,15,0.012273599952459335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,31,0.011711999773979187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,31,0.01226079985499382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,15,0.01804639995098114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,63,0.012656000256538392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,127,0.011428800225257874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,127,0.012449599802494049
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,255,0.013313600420951843
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,255,0.014035199582576752
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,511,0.016446399688720702
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,1023,0.015771199762821198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,1023,0.017340800166130065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,511,0.01804639995098114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,2047,0.01618400067090988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,2047,0.017582400143146514
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,4095,0.018593600392341612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,4095,0.01783200055360794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,8191,0.02216159999370575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,1,0.011529599875211715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,bfloat16,fp8,1023,0.046644800901412965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,1,0.0125231996178627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,3,0.011523199826478958
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,7,0.011748799681663513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,7,0.012316799908876418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,3,0.01247360035777092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,15,0.011897599697113037
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,15,0.012324800342321396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,31,0.011776000261306763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,31,0.012641599774360657
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,63,0.011622399836778641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,63,0.012587200105190276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,127,0.011798399686813354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,255,0.014579200744628906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,255,0.013684800267219544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,511,0.016683200001716615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,511,0.018038399517536163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,1023,0.01631679981946945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,1023,0.017084799706935883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,2047,0.018163199722766876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,2047,0.017739200592041017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,4095,0.02141920030117035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,8191,0.037350401282310486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,8191,0.02732959985733032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,31,0.012433599680662155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,1,0.011872000247240066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,1,0.012654399871826172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,3,0.011840000003576278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,3,0.01266079992055893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,7,0.011860799789428712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,7,0.012571200728416443
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,15,0.01196959987282753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,15,0.01257600039243698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,31,0.01252799928188324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,31,0.011907199770212174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,63,0.0118367999792099
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,63,0.012692800164222718
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,127,0.012051200121641159
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,127,0.012755200266838074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,255,0.013699199259281158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,255,0.014668799936771393
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,511,0.018198400735855103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,1023,0.01767839938402176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,bfloat16,63,0.011443199962377549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,1023,0.017324799299240114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,2047,0.019897599518299103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,2047,0.02101760059595108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,4095,0.03720479905605316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,4095,0.02735520005226135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,8191,0.05892800092697144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,fp8,8191,0.0422111988067627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,1,0.015145599842071533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,bfloat16,fp8,8191,0.021873599290847777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,3,0.01505119949579239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,1,0.015647999942302704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,3,0.0160288006067276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,7,0.015052799880504609
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,7,0.015587200224399567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,15,0.015355199575424194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,15,0.01576640009880066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,31,0.01605280041694641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,63,0.015212799608707427
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,fp8,127,0.012118399888277055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,63,0.016064000129699708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,127,0.015545600652694702
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,127,0.01552799940109253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,255,0.01800000071525574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,511,0.020479999482631683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,511,0.021238400042057036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,1023,0.03978239893913269
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,1023,0.030024001002311708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,2047,0.0608784019947052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,2047,0.044627198576927186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,4095,0.10469759702682495
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,bfloat16,fp8,511,0.018331199884414673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,1,0.01828960031270981
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,fp8,4095,0.06971200108528137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,1,0.019814400374889372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,3,0.018580800294876097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,3,0.01990399956703186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,7,0.018681600689888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,7,0.01987359970808029
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,15,0.01876319944858551
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,15,0.019900800287723543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,31,0.018615999817848207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,31,0.01993120014667511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,63,0.01857600063085556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,63,0.01995680034160614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,127,0.018828800320625304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,127,0.019985599815845488
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,255,0.0233024001121521
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,255,0.02385119944810867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,511,0.039926400780677794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,511,0.032216000556945804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,1023,0.04859839975833893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,2047,0.10794399976730347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,2047,0.07302719950675965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,31,0.01486240029335022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,bfloat16,bfloat16,255,0.017788800597190856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,4095,0.19398239850997925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,fp8,4095,0.12316479682922363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,1,0.026414400339126586
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,1,0.029207998514175416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,3,0.02935200035572052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,3,0.02600640058517456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,7,0.025918400287628172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,bfloat16,bfloat16,4095,0.022195200622081756
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,7,0.029420799016952513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,15,0.029014399647712706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,15,0.02606880068778992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,31,0.0294048011302948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,31,0.02637760043144226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,63,0.02595199942588806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,63,0.028935998678207397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,127,0.026715201139450074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,127,0.0295632004737854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,255,0.04515999853610993
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,255,0.03803359866142273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,511,0.055164802074432376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,1023,0.11152960062026977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,1023,0.0803056001663208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,2047,0.13032640218734742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,2047,0.1966223955154419
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,1,0.0408048003911972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,1,0.04641759991645813
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,fp8,4095,0.2283695936203003
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,3,0.04116640090942383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,4095,0.3671567916870117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,3,0.046430400013923644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,7,0.0471455991268158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,7,0.040375998616218566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,15,0.046475198864936826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,15,0.04132960140705109
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,31,0.04103679955005646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,31,0.046881601214408875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,bfloat16,bfloat16,511,0.01669919937849045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,63,0.04123679995536804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,63,0.04739840030670166
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,127,0.04880000054836273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,127,0.04869759976863861
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,255,0.07290400266647339
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,255,0.06610239744186401
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,511,0.0957423985004425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,511,0.11483199596405029
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,1023,0.20276319980621338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,1023,0.14678080081939698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,bfloat16,1,0.02149440050125122
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,2047,0.24036319255828859
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,2047,0.3704256057739258
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,fp8,1,0.022336000204086305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,bfloat16,bfloat16,1023,0.06461759805679321
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,bfloat16,3,0.02072799950838089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,bfloat16,7,0.021007999777793884
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,fp8,7,0.023004800081253052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,bfloat16,15,0.020742399990558623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,fp8,3,0.022896000742912294
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,bfloat16,31,0.021380800008773803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,fp8,31,0.02232159972190857
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,bfloat16,63,0.0212336003780365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,fp8,4095,0.443665599822998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,bfloat16,bfloat16,4095,0.7090288162231445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,bfloat16,127,0.02130240052938461
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,fp8,127,0.02282879948616028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,bfloat16,511,0.04258399903774261
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,bfloat16,255,0.0257423996925354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,fp8,511,0.03423520028591156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,fp8,255,0.027372801303863527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,bfloat16,1023,0.06941279768943787
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,bfloat16,1,0.028692799806594848
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,fp8,1023,0.05135840177536011
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,fp8,1,0.03146719932556152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,bfloat16,3,0.028019198775291444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,bfloat16,7,0.028014400601387025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,bfloat16,15,0.0280239999294281
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,fp8,7,0.03185440003871918
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,fp8,15,0.03150399923324585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,bfloat16,31,0.02815679907798767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,bfloat16,63,0.028227201104164122
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,fp8,63,0.03129920065402984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,bfloat16,127,0.0302047997713089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,fp8,127,0.03121120035648346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,bfloat16,255,0.047219198942184445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,bfloat16,bfloat16,511,0.06619200110435486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,fp8,255,0.040011200308799746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,bfloat16,511,0.06929119825363159
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,fp8,511,0.057222402095794676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,bfloat16,1023,0.11636960506439209
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,bfloat16,1,0.04333440065383911
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,fp8,1,0.04870879948139191
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,bfloat16,3,0.04339039921760559
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,fp8,3,0.048732799291610715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,bfloat16,7,0.04336319863796234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,fp8,7,0.048790401220321654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,bfloat16,15,0.04321120083332062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,fp8,15,0.04873439967632294
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,bfloat16,31,0.04311679899692535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,fp8,31,0.04878720045089722
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,bfloat16,63,0.04523519873619079
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,bfloat16,127,0.05296639800071716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,fp8,127,0.05125759840011597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,bfloat16,255,0.07476800084114074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,fp8,255,0.06884959936141968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,fp8,15,0.023107199370861052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,bfloat16,511,0.11932159662246704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,bfloat16,fp8,63,0.022942399978637694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,fp8,511,0.09711040258407592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,bfloat16,1,0.07138559818267823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,bfloat16,1023,0.20651679039001464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,fp8,1023,0.14820159673690797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,fp8,1,0.08459839820861817
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,fp8,3,0.08383679986000062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,bfloat16,7,0.0712336003780365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,fp8,3,0.03195520043373108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,fp8,7,0.08393599987030029
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,bfloat16,15,0.07144160270690918
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,fp8,15,0.08398079872131348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,bfloat16,31,0.07108479738235474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,fp8,31,0.03210079967975617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,fp8,31,0.08447679877281189
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,bfloat16,63,0.077811199426651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,fp8,63,0.08603839874267578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,bfloat16,127,0.0831824004650116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,fp8,127,0.09747200012207032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,bfloat16,255,0.12781440019607543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,fp8,255,0.11993440389633178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,bfloat16,511,0.2169424057006836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,bfloat16,fp8,1023,0.08315520286560059
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,fp8,511,0.17977919578552246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,bfloat16,1,0.03474720120429993
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,fp8,1,0.03707039952278137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,fp8,3,0.03765760064125061
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,bfloat16,7,0.03394080102443695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,fp8,1023,0.2800623893737793
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,bfloat16,1023,0.37884480953216554
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,fp8,7,0.0377023994922638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,bfloat16,15,0.03405919969081879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,fp8,15,0.03735359907150269
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,bfloat16,63,0.034083199501037595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,bfloat16,31,0.03420799970626831
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,fp8,63,0.037427198886871335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,bfloat16,127,0.03872799873352051
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,fp8,127,0.03741919994354248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,bfloat16,255,0.05345600247383118
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,bfloat16,fp8,63,0.04893440008163452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,bfloat16,511,0.07617759704589844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,bfloat16,1,0.04834559857845307
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,fp8,1,0.05445600152015686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,fp8,3,0.054492801427841187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,bfloat16,7,0.04871839880943298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,fp8,7,0.05456640124320984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,bfloat16,15,0.04896639883518219
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,fp8,15,0.05442720055580139
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,bfloat16,31,0.048625600337982175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,fp8,31,0.05434719920158386
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,bfloat16,63,0.05077279806137085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,fp8,63,0.05483520030975342
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,bfloat16,bfloat16,3,0.07163680195808411
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,bfloat16,127,0.057968002557754514
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,fp8,127,0.0576960027217865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,bfloat16,255,0.08177440166473389
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,fp8,255,0.07367200255393982
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,bfloat16,511,0.1253615975379944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,fp8,511,0.10339360237121582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,bfloat16,1,0.07698559761047363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,fp8,1,0.09012640118598939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,bfloat16,3,0.07748640179634095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,bfloat16,7,0.07778720259666443
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,fp8,7,0.09037280082702637
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,bfloat16,3,0.034255999326705935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,bfloat16,15,0.0778335988521576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,fp8,15,0.08889120221138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,bfloat16,31,0.07778720259666443
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,fp8,31,0.08888959884643555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,fp8,31,0.03758560121059418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,bfloat16,63,0.08339040279388428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,fp8,63,0.09321920275688171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,bfloat16,127,0.08702560067176819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,fp8,127,0.09772800207138062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,fp8,255,0.0456959992647171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,bfloat16,255,0.1273136019706726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,bfloat16,fp8,511,0.0631600022315979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,fp8,255,0.12444159984588624
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,bfloat16,bfloat16,3,0.048368000984191896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,fp8,511,0.1820479989051819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,bfloat16,1,0.12917120456695558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,bfloat16,3,0.13059359788894653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,fp8,1,0.15604480504989623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,fp8,3,0.15561280250549317
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,bfloat16,7,0.13042399883270264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,fp8,7,0.1564703941345215
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,bfloat16,15,0.13089599609375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,fp8,15,0.15614240169525145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,bfloat16,31,0.1384112000465393
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,bfloat16,63,0.1408911943435669
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,bfloat16,127,0.14968960285186766
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,fp8,63,0.16882400512695311
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,fp8,127,0.17968159914016724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,bfloat16,255,0.23021440505981444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,fp8,255,0.22231199741363525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,1,0.011636800318956374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,1,0.0124719999730587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,3,0.011737599968910217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,3,0.012350399792194367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,bfloat16,511,0.4187136173248291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,7,0.011767999827861786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,7,0.012481600046157837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,fp8,511,0.3428767919540405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,15,0.01255040019750595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,31,0.011785600334405899
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,15,0.011840000003576278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,63,0.01178399994969368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,63,0.012470400333404541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,127,0.011868800222873687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,127,0.012548799812793731
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,255,0.013337600231170654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,255,0.014496000111103058
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,511,0.016339200735092162
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,511,0.01812320053577423
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,1023,0.016302399337291718
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,1023,0.017059199512004852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,2047,0.016622400283813475
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,2047,0.017591999471187593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,4095,0.018353599309921264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,4095,0.01818400025367737
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,bfloat16,8191,0.021967999637126923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,8191,0.022041599452495574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,1,0.011766400188207626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,1,0.012564800679683685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,3,0.01170400008559227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,3,0.012529599666595458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,fp8,3,0.08983039855957031
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,7,0.011847999691963196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,7,0.012521600723266602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,15,0.011801599711179733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,31,0.011720000207424164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,31,0.012483199685811996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,63,0.01180960014462471
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,15,0.012811200320720672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,63,0.01249760016798973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,127,0.011883199959993363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,127,0.012529599666595458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,255,0.014414399862289429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,255,0.013728000223636627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,511,0.016492800414562227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,511,0.018111999332904815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,1023,0.016323199868202208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,1023,0.01749120056629181
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,2047,0.018001599609851836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,2047,0.01772959977388382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,4095,0.02143999934196472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,bfloat16,8191,0.03833119869232178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,4095,0.021859200298786165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,1,0.01268479973077774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,3,0.012187200039625168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,3,0.012649600207805634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,7,0.012100800126791
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,7,0.012695999443531036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,15,0.01207360029220581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,15,0.012652799487113953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,31,0.012768000364303589
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,31,0.012060800194740295
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,bfloat16,bfloat16,511,0.21805601119995116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,63,0.011959999799728394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,63,0.012971200048923492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,127,0.012828800082206725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,127,0.012089599668979645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,255,0.014033600687980652
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,255,0.014587199687957764
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,511,0.01698080003261566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,511,0.018379199504852294
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,1023,0.017297600209712983
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,1023,0.01793919950723648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,bfloat16,fp8,31,0.15677599906921386
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,2047,0.02140959948301315
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,4095,0.037529599666595456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,4095,0.02804799973964691
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,8191,0.06050400137901306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,1,0.012296000123023986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,fp8,8191,0.04262399971485138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,1,0.013048000633716583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,3,0.012377600371837615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,3,0.012971200048923492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,7,0.012363199889659882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,7,0.012940800189971924
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,15,0.012960000336170197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,31,0.01212640032172203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,31,0.01281919926404953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,63,0.012127999961376191
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,63,0.012980799376964568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,127,0.012489599734544754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,127,0.01292639970779419
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,255,0.014027200639247894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,255,0.014875200390815736
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,511,0.01736160069704056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,511,0.018467199802398682
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,1023,0.020295999944210052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,1023,0.0210207998752594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,2047,0.03752799928188324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,2047,0.027635198831558228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,4095,0.05948160290718078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,4095,0.042559999227523806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,8191,0.10236799716949463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,bfloat16,fp8,8191,0.027983999252319335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,bfloat16,1,0.05505920052528381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,fp8,1,0.06087999939918518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,bfloat16,3,0.055067199468612674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,fp8,3,0.06093760132789612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,bfloat16,7,0.0550495982170105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,fp8,7,0.060838401317596436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,bfloat16,15,0.05512160062789917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,bfloat16,31,0.05522879958152771
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,fp8,15,0.06097440123558044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,fp8,31,0.061273598670959474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,bfloat16,63,0.06074079871177673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,fp8,63,0.06182399988174438
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,bfloat16,127,0.06567680239677429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,2047,0.019508799910545348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,fp8,127,0.06675519943237304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,bfloat16,255,0.08695039749145508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,bfloat16,1,0.08321279883384705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,bfloat16,fp8,255,0.07940800189971924
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,fp8,1,0.09444479942321778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,fp8,3,0.0950927972793579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,bfloat16,7,0.08256000280380249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,bfloat16,15,0.08260160088539123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,bfloat16,15,0.012278400361537933
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,fp8,15,0.09461439847946167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,bfloat16,fp8,31,0.012318400293588638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,bfloat16,31,0.08506079912185668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,fp8,31,0.09490240216255189
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,bfloat16,63,0.08939359784126281
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,fp8,63,0.10107359886169434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,bfloat16,127,0.09588479995727539
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,fp8,127,0.10381280183792115
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,fp8,255,0.1290336012840271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,bfloat16,255,0.13540480136871338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,bfloat16,1,0.13778239488601685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,bfloat16,3,0.13866080045700074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,fp8,1,0.16178239583969117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,fp8,3,0.16197279691696168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,bfloat16,7,0.1386863946914673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,fp8,7,0.16166720390319825
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,bfloat16,fp8,8191,0.06751999855041504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,bfloat16,15,0.13865760564804078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,fp8,15,0.16340160369873047
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,bfloat16,31,0.1440608024597168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,fp8,31,0.166867196559906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,bfloat16,63,0.14658880233764648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,fp8,63,0.1735648036003113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,fp8,127,0.17919039726257324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,bfloat16,255,0.2315135955810547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,fp8,255,0.22873120307922362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,bfloat16,1,0.24410560131072997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,fp8,1,0.2908735990524292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,bfloat16,3,0.24403040409088134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,fp8,3,0.2938528060913086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,bfloat16,7,0.24666879177093506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,fp8,7,0.29183039665222166
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,bfloat16,3,0.08364319801330566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,bfloat16,15,0.2583136081695557
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,fp8,15,0.2919487953186035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,bfloat16,31,0.26218879222869873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,fp8,31,0.31446878910064696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,bfloat16,63,0.2642319917678833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,bfloat16,fp8,7,0.094896000623703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,fp8,63,0.31718719005584717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,bfloat16,1,0.0966048002243042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,bfloat16,127,0.2828304052352905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,fp8,127,0.3378911972045898
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,fp8,1,0.1076464056968689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,bfloat16,255,0.43155522346496583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,bfloat16,7,0.09661279916763306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,fp8,3,0.10767680406570435
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,bfloat16,15,0.09903839826583863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,fp8,7,0.1065440058708191
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,bfloat16,31,0.1009600043296814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,fp8,31,0.10946079492568969
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,bfloat16,63,0.10559040307998657
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,fp8,63,0.1139456033706665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,fp8,127,0.11618239879608154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,bfloat16,bfloat16,1,0.012052799761295318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,bfloat16,1,0.1498128056526184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,fp8,1,0.1734096050262451
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,bfloat16,3,0.15032000541687013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,bfloat16,bfloat16,127,0.15705599784851074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,fp8,3,0.17311840057373046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,bfloat16,7,0.15086560249328612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,fp8,7,0.17191840410232545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,bfloat16,15,0.15444159507751465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,fp8,15,0.1742095947265625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,bfloat16,31,0.15481120347976685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,fp8,31,0.1820032000541687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,bfloat16,63,0.1601423978805542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,fp8,63,0.18315199613571168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,bfloat16,127,0.17219200134277343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,bfloat16,fp8,127,0.18626240491867066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,bfloat16,1,0.2623631954193115
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,bfloat16,3,0.26201601028442384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,fp8,1,0.3109568119049072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,fp8,3,0.3090303897857666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,bfloat16,7,0.2627343893051147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,fp8,7,0.31186399459838865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,bfloat16,15,0.2647504091262817
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,fp8,15,0.3151024103164673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,bfloat16,3,0.09659680128097534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,bfloat16,fp8,255,0.42606401443481445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,fp8,31,0.32024641036987306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,fp8,15,0.10827840566635132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,bfloat16,63,0.2697232007980347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,fp8,63,0.3254064083099365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,bfloat16,127,0.2928319931030273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,fp8,127,0.3300928115844727
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,bfloat16,1,0.49500160217285155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,bfloat16,bfloat16,127,0.1126207947731018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,bfloat16,3,0.4895535945892334
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,fp8,3,0.5776351928710938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,bfloat16,7,0.4889376163482666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,bfloat16,15,0.4929808139801025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,fp8,15,0.60208158493042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,fp8,31,0.601416015625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,bfloat16,63,0.502345609664917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,fp8,63,0.6042543888092041
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,bfloat16,127,0.5367695808410644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,1,0.0117296002805233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,3,0.011950399726629257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,3,0.012439999729394913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,1,0.012307199835777282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,7,0.01196959987282753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,15,0.011547199636697768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,7,0.012412799894809723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,31,0.01157120019197464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,15,0.012814399600028992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,31,0.012465599924325943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,63,0.011726400256156922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,63,0.012718400359153748
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,127,0.011771199852228164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,127,0.012241599708795547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,255,0.013902400434017182
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,255,0.014347200095653535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,bfloat16,bfloat16,31,0.26904160976409913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,511,0.018606400489807128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,1023,0.016123199462890626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,1023,0.01727679967880249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,2047,0.01855359971523285
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,2047,0.017497600615024568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,4095,0.021478399634361267
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,4095,0.02171359956264496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,8191,0.038878399133682254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,fp8,8191,0.028233599662780762
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,1,0.012388800084590913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,3,0.011950399726629257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,1,0.012529599666595458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,3,0.012904000282287598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,7,0.01191679984331131
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,7,0.012539200484752655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,15,0.012176000326871873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,15,0.012649600207805634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,31,0.011902400106191636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,63,0.011953599750995636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,fp8,1,0.5610095977783203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,127,0.012044800072908401
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,255,0.013766400516033173
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,127,0.012700800597667695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,255,0.014641599357128143
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,511,0.016683200001716615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,511,0.018291200697422027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,1023,0.017641599476337432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,1023,0.017260800302028655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,2047,0.01979999989271164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,2047,0.020908799767494202
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,bfloat16,31,0.4962751865386963
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,4095,0.027988800406455995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,8191,0.06028159856796265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,8191,0.04332000017166138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,1,0.012387199699878693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,1,0.01308799982070923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,3,0.012353599816560746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,3,0.013091200590133667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,7,0.012214399874210358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,7,0.012963199615478515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,15,0.012263999879360199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,fp8,127,0.6258255958557128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,15,0.012980799376964568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,31,0.012945599853992462
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,63,0.01226079985499382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,63,0.012947200238704682
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,bfloat16,bfloat16,511,0.016120000183582305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,127,0.012246400117874146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,127,0.012611199915409089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,255,0.014028799533843995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,255,0.014564800262451171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,511,0.017081600427627564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,511,0.01810719966888428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,1023,0.02065120041370392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,1023,0.020454399287700653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,2047,0.038022398948669434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,2047,0.026553601026535034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,4095,0.060262399911880496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,4095,0.041975998878479005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,8191,0.10380480289459229
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,fp8,8191,0.06665440201759339
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,31,0.01284639984369278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,fp8,63,0.01257600039243698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,1,0.012361600250005721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,3,0.012494400143623352
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,7,0.012300799787044524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,3,0.012937599420547485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,7,0.013318400084972381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,15,0.012355200201272964
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,15,0.01295360028743744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,31,0.01244800016283989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,31,0.01311199963092804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,127,0.012577599287033081
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,63,0.012230399996042252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,127,0.012982399761676788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,255,0.01385599970817566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,bfloat16,fp8,7,0.5969183921813965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,255,0.015131199359893798
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,511,0.01690559983253479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,511,0.018568000197410582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,1023,0.03671680092811584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,1023,0.025673601031303405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,bfloat16,bfloat16,4095,0.03826560080051422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,2047,0.05803520083427429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,2047,0.04198080003261566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,4095,0.1011855959892273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,4095,0.06513280272483826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,1,0.013051199913024902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,1,0.014047999680042268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,bfloat16,8191,0.18739999532699586
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,3,0.013049599528312684
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,3,0.01387999951839447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,7,0.013974399864673614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,7,0.013316799700260163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,15,0.01324480026960373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,15,0.013918399810791016
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,31,0.013206399977207184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,31,0.0139055997133255
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,63,0.013208000361919403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,127,0.01327199935913086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,127,0.014059199392795563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,255,0.014764800667762756
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,255,0.01584160029888153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,511,0.01833280026912689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,511,0.019433599710464478
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,1023,0.021092799305915833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,1023,0.022147199511528014
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,2047,0.039719998836517334
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,2047,0.028787198662757873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,4095,0.061267197132110596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,4095,0.04301440119743347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,8191,0.10630719661712647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,8191,0.06767359972000123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,16383,0.11870559453964233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,bfloat16,16383,0.19291679859161376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,1,0.013463999330997466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,1,0.014542399346828461
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,3,0.013462400436401368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,3,0.014535999298095703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,7,0.0133200004696846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,7,0.01451359987258911
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,15,0.013326400518417358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,15,0.01451359987258911
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,31,0.013326400518417358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,31,0.014500799775123595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,63,0.013344000279903411
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,1,0.012756800651550293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,63,0.014511999487876893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,127,0.013489599525928497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,127,0.014403200149536133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,255,0.015216000378131866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,255,0.01639840006828308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,511,0.018596799671649934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,511,0.020420800149440765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,63,0.01340319961309433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,1023,0.03781599998474121
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,1023,0.02826879918575287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,2047,0.06017439961433411
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,2047,0.04169760048389435
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,4095,0.10403519868850708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,4095,0.06747199892997742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,8191,0.18920799493789672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,8191,0.11781920194625854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,bfloat16,fp8,8191,0.11590559482574463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,3,0.01743520051240921
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,bfloat16,16383,0.36503360271453855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,3,0.01886080056428909
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,bfloat16,fp8,63,0.01401440054178238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,7,0.018828800320625304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,7,0.01727039963006973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,15,0.017070400714874267
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,31,0.018475200235843658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,63,0.017107200622558594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,63,0.018513600528240203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,127,0.01706240028142929
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,bfloat16,bfloat16,31,0.011907199770212174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,127,0.01844799965620041
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,255,0.021993599832057953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,255,0.02252960056066513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,511,0.030427199602127076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,1023,0.06248639822006226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,1023,0.0474128007888794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,2047,0.10616159439086914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,2047,0.0710752010345459
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,4095,0.1919535994529724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,4095,0.12200000286102294
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,8191,0.36257760524749755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,1,0.017158399522304534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,8191,0.22076001167297363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,bfloat16,fp8,16383,0.21731200218200683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,1,0.024718399345874786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,1,0.027475199103355406
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,15,0.01879040002822876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,3,0.024563199281692503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,31,0.017375999689102174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,3,0.027531200647354127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,7,0.0244719997048378
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,15,0.024953599274158477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,7,0.02755360007286072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,15,0.02746239900588989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,16383,0.41728639602661133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,31,0.027603200078010558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,63,0.024719999730587007
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,127,0.025435200333595274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,127,0.027551999688148497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,255,0.04316799938678741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,255,0.03652159869670868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,511,0.03767040073871612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,511,0.06333119869232177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,511,0.052825599908828735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,1023,0.07892159819602966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,2047,0.19429919719696045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,2047,0.12667360305786132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,4095,0.36284480094909666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,8191,0.7046976089477539
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,8191,0.4261023998260498
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,fp8,1,0.018892799317836762
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,1,0.011475200206041336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,1,0.012203200161457062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,3,0.011907199770212174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,3,0.012148799747228623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,7,0.01146719977259636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,7,0.012191999703645706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,15,0.011579199880361556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,15,0.012336000055074691
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,16383,0.8251184463500977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,31,0.02497120052576065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,31,0.011622399836778641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,bfloat16,bfloat16,16383,0.7065375804901123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,63,0.012303999811410903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,63,0.011488000303506852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,63,0.02752000093460083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,31,0.012222400307655335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,255,0.013156799972057343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,127,0.011503999680280685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,255,0.014323200285434722
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,16383,1.396008014678955
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,511,0.018203200399875642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,511,0.01626880019903183
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,1023,0.015483200550079346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,1023,0.016729600727558136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,2047,0.01703840047121048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,8191,0.018171200156211854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,4095,0.017638400197029114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,8191,0.01866080015897751
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,2047,0.015828800201416016
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,16383,0.020633600652217865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,16383,0.02096640020608902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,1,0.011367999762296677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,1,0.01223680004477501
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,3,0.011566399782896041
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,3,0.012145599722862244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,7,0.011484800279140473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,7,0.012361600250005721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,15,0.01226079985499382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,31,0.011638399958610535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,31,0.01223680004477501
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,63,0.01168000027537346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,63,0.012275200337171555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,127,0.01165440008044243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,127,0.012256000190973282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,255,0.013204799592494964
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,255,0.01424960047006607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,511,0.016094399988651274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,511,0.018265600502490997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,1023,0.015615999698638916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,1023,0.01684480011463165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,2047,0.01610559970140457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,2047,0.01701759994029999
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,4095,0.016459199786186218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,4095,0.017505599558353423
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,8191,0.018228800594806673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,8191,0.018991999328136444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,16383,0.020550400018692017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,fp8,16383,0.020924800634384157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,1,0.011451199650764465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,1,0.012435200065374375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,3,0.011500799655914306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,3,0.012454400211572647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,7,0.01154400035738945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,7,0.012443199753761292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,15,0.011598400026559829
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,15,0.012323199957609176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,31,0.011604800075292587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,31,0.012382400035858155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,63,0.011591999977827071
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,63,0.012300799787044524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,127,0.011575999855995178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,127,0.012257599830627441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,255,0.013172799348831176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,255,0.01422400027513504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,511,0.016145600378513335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,511,0.01805119961500168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,1023,0.015512000024318694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,1023,0.01690559983253479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,2047,0.015889599919319153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,2047,0.01722240000963211
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,4095,0.01690080016851425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,4095,0.017712000012397765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,8191,0.019094400107860565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,8191,0.01887679994106293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,bfloat16,16383,0.022966399788856506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,bfloat16,fp8,16383,0.022540800273418427
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,1,0.012323199957609176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,3,0.011603199690580369
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,fp8,127,0.012303999811410903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,3,0.012563200294971466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,7,0.011468800157308579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,15,0.011443199962377549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,bfloat16,bfloat16,4095,0.01698399931192398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,15,0.01223199963569641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,bfloat16,1023,0.10988160371780395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,31,0.012294399738311767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,31,0.011750400066375732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,63,0.01157120019197464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,63,0.012305600196123123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,127,0.012353599816560746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,bfloat16,bfloat16,15,0.011425600200891495
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,255,0.013247999548912048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,255,0.014372800290584565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,511,0.01788640022277832
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,511,0.016201600432395935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,1023,0.016097599267959596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,1023,0.016998399794101716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,2047,0.016335999965667723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,2047,0.01751199960708618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,4095,0.018300800025463103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,4095,0.018367999792099
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,8191,0.021793599426746368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,bfloat16,fp8,4095,0.22660160064697266
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,8191,0.021671999990940095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,16383,0.03792960047721863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,16383,0.028409600257873535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,1,0.012187200039625168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,3,0.011483199894428253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,3,0.012227199971675873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,7,0.011499200016260147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,7,0.012281599640846252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,15,0.011526399850845337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,15,0.012385600060224534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,31,0.01149120032787323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,31,0.012219200283288956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,63,0.011638399958610535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,63,0.012247999757528305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,127,0.011760000139474869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,127,0.012355200201272964
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,255,0.013143999874591828
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,255,0.014168000221252442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,511,0.016166399419307708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,511,0.018241600692272188
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,1023,0.015667200088500977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,1023,0.017052799463272095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,2047,0.01624159961938858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,4095,0.016828800737857818
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,4095,0.017633600533008574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,8191,0.018673600256443025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,8191,0.018612800538539885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,16383,0.020878399908542632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,16383,0.02070080041885376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,1,0.012273599952459335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,3,0.01154239997267723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,3,0.012363199889659882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,7,0.0117296002805233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,7,0.01250240057706833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,15,0.011648000031709672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,15,0.012467200309038163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,31,0.011684799939393998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,1,0.011681599915027619
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,31,0.012387199699878693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,63,0.011684799939393998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,63,0.012294399738311767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,127,0.011640000343322753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,127,0.012319999933242797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,255,0.013167999684810638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,255,0.0142752006649971
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,fp8,7,0.012305600196123123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,511,0.016120000183582305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,511,0.018225599825382233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,1023,0.016804799437522888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,2047,0.016227200627326965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,2047,0.01716960072517395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,4095,0.017180800437927246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,4095,0.017591999471187593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,8191,0.019387200474739075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,8191,0.018947200477123262
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,bfloat16,bfloat16,127,0.011750400066375732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,16383,0.02327519953250885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,fp8,16383,0.022681599855422972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,1,0.011740799993276596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,1,0.012479999661445617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,3,0.011604800075292587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,3,0.012464000284671784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,7,0.011662399768829346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,7,0.01234399974346161
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,15,0.011820799857378005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,15,0.012415999919176102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,31,0.01154400035738945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,31,0.012427199631929398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,63,0.01167839989066124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,63,0.012223999947309494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,127,0.01173119992017746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,127,0.012441600114107132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,255,0.013121600449085235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,255,0.014468799531459808
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,511,0.016359999775886536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,511,0.017876799404621124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,1023,0.01581120043992996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,1023,0.01706880033016205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,bfloat16,1,0.011535999923944473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,2047,0.016339200735092162
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,2047,0.017377600073814392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,4095,0.018478399515151976
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,8191,0.021806399524211883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,4095,0.01802240014076233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,8191,0.021731199324131013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,bfloat16,16383,0.03882719874382019
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,1,0.01170559972524643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,1,0.012707200646400452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,bfloat16,fp8,16383,0.028203201293945313
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,3,0.011801599711179733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,3,0.012414400279521943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,7,0.01186719983816147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,7,0.012479999661445617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,15,0.011961600184440613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,15,0.012432000041007996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,31,0.01186399981379509
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,31,0.012539200484752655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,63,0.011980800330638886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,63,0.012542399764060973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,127,0.011883199959993363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,127,0.0124719999730587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,255,0.013468800485134125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,255,0.01443679928779602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,511,0.016446399688720702
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,bfloat16,fp8,2047,0.01727679967880249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,511,0.018147200345993042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,1023,0.01624159961938858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,1023,0.01709440052509308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,2047,0.01791999936103821
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,2047,0.017712000012397765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,4095,0.02147040069103241
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,8191,0.037467199563980105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,8191,0.027833598852157592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,16383,0.05913599729537964
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,fp8,16383,0.04254559874534607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,1,0.012731200456619263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,3,0.012571200728416443
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,3,0.013284799456596375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,7,0.012590399384498597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,7,0.013663999736309052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,15,0.012612800300121307
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,15,0.0133200004696846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,31,0.012598399817943574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,31,0.0133200004696846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,63,0.01265760064125061
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,63,0.013542400300502777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,127,0.012732799351215362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,127,0.013380800187587739
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,255,0.01467359960079193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,255,0.015374399721622467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,511,0.01759999990463257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,511,0.01897920072078705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,1023,0.01873600035905838
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,1023,0.01835840046405792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,2047,0.0204927995800972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,2047,0.021958400309085847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,4095,0.03877440094947815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,4095,0.028295999765396117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,1023,0.01571040004491806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,8191,0.06038560271263123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,8191,0.04331200122833252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,1,0.012638400495052337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,bfloat16,16383,0.10495200157165527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,16383,0.0681119978427887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,1,0.013551999628543854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,3,0.01268640011548996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,3,0.013700799643993377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,7,0.012676799297332763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,7,0.013439999520778656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,15,0.012950399518013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,15,0.013566400110721587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,31,0.012656000256538392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,31,0.013680000603199006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,63,0.012665599584579468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,63,0.013368000090122224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,127,0.012887999415397644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,127,0.01356000006198883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,255,0.014537599682807923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,511,0.017561599612236023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,511,0.01895360052585602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,1023,0.020547200739383698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,1023,0.0215488001704216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,2047,0.03845919966697693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,2047,0.02815679907798767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,4095,0.06037120223045349
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,4095,0.042929598689079286
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,8191,0.06842880249023438
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,16383,0.19100960493087768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,bfloat16,bfloat16,4095,0.02173440009355545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,16383,0.119595205783844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,1,0.013459199666976928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,bfloat16,bfloat16,1,0.011512000113725662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,bfloat16,fp8,1,0.013374400138854981
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,3,0.013447999954223633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,3,0.012726399302482604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,7,0.012651200592517852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,15,0.01266240030527115
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,15,0.0135343998670578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,31,0.01276479959487915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,31,0.013524800539016724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,63,0.012774400413036346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,63,0.013572800159454345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,127,0.012860800325870513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,127,0.013571199774742127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,255,0.014689600467681885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,255,0.015425600111484528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,511,0.01753759980201721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,511,0.019070400297641753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,1023,0.03693279922008515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,1023,0.026423999667167665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,2047,0.0588208019733429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,2047,0.04213280081748962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,4095,0.10266400575637817
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,4095,0.06616320013999939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,8191,0.1882863998413086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,8191,0.11832159757614136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,1,0.01646080017089844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,1,0.017825600504875184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,16383,0.3604703903198242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,3,0.01642879992723465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,16383,0.21634399890899658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,7,0.016467200219631196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,3,0.017951999604701997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,15,0.01648319959640503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,7,0.01780640035867691
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,15,0.017910400032997133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,31,0.016406400501728056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,31,0.017871999740600587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,63,0.016571199893951415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,63,0.01789119988679886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,127,0.016684800386428833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,127,0.017958399653434754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,255,0.0203232005238533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,255,0.021803200244903564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,fp8,255,0.01576479971408844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,511,0.03668000102043152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,511,0.029235199093818665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,1023,0.06079360246658325
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,1023,0.045228800177574156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,2047,0.10430400371551514
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,2047,0.06970559954643249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,4095,0.18966879844665527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,4095,0.11902079582214356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,bfloat16,bfloat16,8191,0.10468800067901611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,8191,0.3608128070831299
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,8191,0.21754879951477052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,1,0.011582399904727935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,1,0.01239679977297783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,3,0.011644800007343293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,3,0.012379200011491776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,bfloat16,1,0.01282079964876175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,7,0.011646399646997452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,7,0.012425599992275238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,31,0.011695999652147293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,15,0.012383999675512314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,31,0.012476799637079239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,63,0.011628799885511399
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,63,0.012328000366687774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,127,0.011708799749612808
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,bfloat16,16383,0.7050303936004638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,127,0.012283200025558471
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,255,0.013255999982357025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,bfloat16,fp8,7,0.013801600039005279
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,511,0.018174399435520173
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,1023,0.017156800627708434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,511,0.016444799304008485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,2047,0.016233600676059723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,2047,0.01762080043554306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,4095,0.018001599609851836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,4095,0.017267200350761413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,8191,0.018980799615383147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,16383,0.023196800053119658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,16383,0.02282879948616028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,1,0.011734399944543839
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,1,0.012292800098657608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,3,0.011697600036859513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,3,0.012275200337171555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,7,0.011744000017642975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,7,0.012263999879360199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,15,0.011740799993276596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,15,0.012299200147390365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,31,0.011760000139474869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,31,0.012263999879360199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,63,0.011875200271606445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,63,0.012452799826860428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,127,0.011851199716329575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,127,0.012408000230789185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,255,0.013475200533866883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,255,0.014416000247001648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,511,0.016571199893951415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,511,0.01807679980993271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,1023,0.016047999262809753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,1023,0.017056000232696534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,2047,0.016571199893951415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,2047,0.01717440038919449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,4095,0.01844639927148819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,4095,0.01802240014076233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,8191,0.02204640060663223
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,8191,0.02172800004482269
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,bfloat16,16383,0.03877759873867035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,bfloat16,fp8,16383,0.02834399938583374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,1,0.011958400160074234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,1,0.01249760016798973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,3,0.011998400092124939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,3,0.012495999783277511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,7,0.011988800019025803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,7,0.012488000094890594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,15,0.01199679970741272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,15,0.012510399520397186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,31,0.011972799897193909
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,31,0.012510399520397186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,63,0.01197120025753975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,63,0.012484800070524216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,127,0.011846400052309036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,127,0.012303999811410903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,255,0.013368000090122224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,255,0.014577600359916686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,511,0.016489599645137788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,511,0.018300800025463103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,1023,0.016340799629688263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,1023,0.01695519983768463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,2047,0.018012799322605133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,2047,0.017467199265956877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,4095,0.02173759937286377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,15,0.011604800075292587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,4095,0.021209600567817687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,8191,0.03765600025653839
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,8191,0.027137601375579835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,bfloat16,16383,0.059889602661132815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,bfloat16,fp8,16383,0.04290719926357269
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,bfloat16,fp8,16383,0.4117743968963623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,1,0.01180799975991249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,1,0.012945599853992462
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,3,0.011872000247240066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,7,0.011689600348472596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,1023,0.015886400640010834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,7,0.012908799946308136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,15,0.011937599629163742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,15,0.012556800246238708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,3,0.012697599828243256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,31,0.012027200311422348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,31,0.012636800110340119
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,63,0.01162400022149086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,63,0.012993599474430084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,127,0.011998400092124939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,bfloat16,8191,0.0195360004901886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,127,0.012780800461769104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,255,0.013577599823474885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,255,0.014739200472831726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,1023,0.017745600640773775
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,1023,0.017364799976348877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,2047,0.01958400011062622
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,2047,0.02143999934196472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,4095,0.037836799025535585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,4095,0.02682720124721527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,8191,0.0589680016040802
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,8191,0.04279040098190308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,16383,0.10204479694366456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,1,0.015112000703811645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,16383,0.06793280243873596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,3,0.015078400075435639
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,1,0.015835200250148774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,3,0.01560640037059784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,7,0.014584000408649444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,7,0.015904000401496886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,15,0.015228800475597382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,15,0.015331199765205384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,31,0.015240000188350677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,31,0.015907199680805208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,63,0.014979200065135955
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,127,0.015476800501346588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,127,0.01579679995775223
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,255,0.017057600617408752
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,255,0.017099200189113616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,511,0.021091200411319733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,511,0.021558399498462676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,1023,0.038878399133682254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,2047,0.061561602354049685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,2047,0.04389919936656952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,4095,0.10463520288467407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,4095,0.06911360025405884
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,8191,0.12005280256271363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,bfloat16,8191,0.19221919775009155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,1,0.018454399704933167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,1,0.020230400562286376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,3,0.018427200615406036
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,3,0.020183999836444855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,7,0.018592000007629395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,7,0.020239999890327452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,15,0.018607999384403228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,15,0.020403200387954713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,31,0.018555200099945067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,31,0.020431999862194062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,63,0.018569600582122803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,63,0.02070239931344986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,127,0.018505600094795228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,127,0.020457600057125092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,255,0.022356800734996796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,255,0.024500800669193266
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,511,0.03943679928779602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,bfloat16,fp8,255,0.014483200013637542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,511,0.03247039914131165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,1023,0.06419199705123901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,1023,0.048742398619651794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,2047,0.10843679904937745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,2047,0.0725823998451233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,bfloat16,511,0.016761599481105803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,bfloat16,fp8,511,0.018367999792099
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,4095,0.12394399642944336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,4095,0.19283679723739625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,1,0.026438400149345398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,1,0.02924480140209198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,3,0.026092800498008727
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,3,0.02932479977607727
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,7,0.026347199082374574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,7,0.0289247989654541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,bfloat16,8191,0.36392641067504883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,bfloat16,fp8,8191,0.22306880950927735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,15,0.026495999097824095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,15,0.029393601417541503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,31,0.025968000292778015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,63,0.02940320074558258
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,63,0.026051199436187743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,127,0.0290367990732193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,255,0.0372624009847641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,255,0.04461280107498169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,511,0.06559360027313232
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,63,0.01600960046052933
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,1023,0.10993759632110596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,1023,0.07995520234107971
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,2047,0.19647040367126464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,bfloat16,fp8,1023,0.029392001032829285
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,4095,0.3662336111068726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,4095,0.22569119930267334
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,1,0.04058560132980347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,1,0.046489599347114566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,8191,0.7062543869018555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,8191,0.4264848232269287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,3,0.04068160057067871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,3,0.046321600675582886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,7,0.04065600037574768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,7,0.04633919894695282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,15,0.04663040041923523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,15,0.040759998559951785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,31,0.04011360108852387
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,31,0.04652479887008667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,63,0.041443198919296265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,63,0.0463919997215271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,127,0.0474592000246048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,127,0.04812160134315491
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,255,0.07347999811172486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,255,0.06554080247879028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,511,0.11412960290908813
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,511,0.09486560225486755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,1023,0.20044000148773194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,1023,0.14500319957733154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,2047,0.3714911937713623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,2047,0.23890559673309325
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,bfloat16,127,0.026631999015808105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,511,0.05385439991950989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,bfloat16,1,0.020996800065040587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,4095,0.7071263790130615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,4095,0.43753600120544434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,fp8,1,0.022579200565814972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,2047,0.1281551957130432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,bfloat16,7,0.020824000239372253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,bfloat16,3,0.021382400393486024
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,fp8,3,0.02232159972190857
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,fp8,7,0.02300799936056137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,bfloat16,15,0.02096959948539734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,fp8,15,0.022387200593948366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,fp8,31,0.022974400222301482
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,bfloat16,63,0.021211199462413788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,fp8,63,0.02253279983997345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,bfloat16,127,0.021385599672794343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,fp8,127,0.022694399952888487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,bfloat16,255,0.02459840029478073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,fp8,255,0.02709600031375885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,bfloat16,511,0.043300798535346983
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,fp8,511,0.034001600742340085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,bfloat16,1023,0.06681920289993286
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,fp8,8191,0.8483504295349121
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,bfloat16,2047,0.11013599634170532
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,fp8,2047,0.07523840069770812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,bfloat16,bfloat16,8191,1.3894720077514648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,fp8,1,0.031204798817634584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,bfloat16,3,0.028479999303817748
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,bfloat16,1,0.028814399242401124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,bfloat16,7,0.028169599175453187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,fp8,7,0.031488001346588135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,fp8,3,0.033395200967788696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,fp8,15,0.031123200058937074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,bfloat16,31,0.028276801109313965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,bfloat16,63,0.028233599662780762
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,fp8,63,0.03143840134143829
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,bfloat16,127,0.029728001356124877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,fp8,127,0.031523200869560244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,bfloat16,255,0.04772160053253174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,fp8,255,0.040110400319099425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,bfloat16,511,0.06890079975128174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,fp8,511,0.05644800066947937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,fp8,1023,0.08349279761314392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,bfloat16,1023,0.11482239961624145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,fp8,2047,0.13112800121307372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,bfloat16,2047,0.19925600290298462
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,bfloat16,1,0.042894399166107176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,fp8,1,0.049572798609733584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,bfloat16,fp8,31,0.028905600309371948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,bfloat16,3,0.04268800020217896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,fp8,3,0.04958719909191132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,fp8,7,0.04868319928646088
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,bfloat16,7,0.04327360093593598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,bfloat16,15,0.04350880086421967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,fp8,15,0.04862399995326996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,bfloat16,31,0.043017598986625674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,fp8,31,0.04975999891757965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,bfloat16,63,0.043689599633216857
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,fp8,63,0.04922879934310913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,bfloat16,127,0.050488001108169554
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,fp8,127,0.05061119794845581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,bfloat16,255,0.07443519830703735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,fp8,255,0.06871520280838013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,bfloat16,511,0.11727360486984253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,fp8,511,0.09736160039901734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,bfloat16,31,0.020814399421215057
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,bfloat16,1023,0.20304639339447023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,fp8,1023,0.14648159742355346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,bfloat16,1,0.07209600210189819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,fp8,1,0.083542400598526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,bfloat16,3,0.07215999960899352
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,bfloat16,2047,0.3719327926635742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,bfloat16,fp8,2047,0.24369120597839355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,bfloat16,fp8,1023,0.051076799631118774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,fp8,3,0.08359360098838806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,bfloat16,7,0.07221599817276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,bfloat16,15,0.07086719870567322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,bfloat16,31,0.07147039771080017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,fp8,7,0.08367679715156555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,fp8,31,0.08376320004463196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,fp8,63,0.08681920170783997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,bfloat16,127,0.0819263994693756
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,fp8,127,0.09523839950561523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,bfloat16,15,0.028747200965881348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,fp8,255,0.11866879463195801
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,bfloat16,fp8,31,0.03374559879302978
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,bfloat16,511,0.21565759181976318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,fp8,511,0.18215839862823485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,bfloat16,1023,0.3749711990356445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,fp8,1023,0.28231201171875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,bfloat16,1,0.03392800092697144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,fp8,1,0.03750559985637665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,bfloat16,3,0.0341264009475708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,fp8,3,0.037567999958992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,bfloat16,7,0.034129598736763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,fp8,2047,0.4778223991394043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,bfloat16,2047,0.7159679889678955
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,bfloat16,15,0.03418079912662506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,fp8,31,0.0370608001947403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,bfloat16,31,0.0347680002450943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,fp8,63,0.037785598635673524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,bfloat16,127,0.03653120100498199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,fp8,127,0.03777439892292023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,bfloat16,255,0.05355679988861084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,fp8,255,0.04668160080909729
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,bfloat16,511,0.07527359724044799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,fp8,511,0.06369600296020508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,bfloat16,1023,0.12218400239944457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,fp8,1023,0.09109920263290405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,bfloat16,63,0.07787520289421082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,fp8,15,0.08526560068130493
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,bfloat16,1,0.04938240051269531
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,fp8,1,0.05416640043258667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,fp8,3,0.055131202936172484
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,bfloat16,7,0.04829759895801544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,fp8,7,0.05522720217704773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,bfloat16,bfloat16,255,0.13038400411605836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,bfloat16,15,0.0482367992401123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,fp8,15,0.055255997180938723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,bfloat16,31,0.04878720045089722
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,fp8,31,0.05413920283317566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,bfloat16,63,0.05070880055427551
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,fp8,63,0.05457599759101868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,bfloat16,127,0.05766080021858215
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,fp8,127,0.05746080279350281
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,bfloat16,255,0.08214399814605713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,fp8,255,0.07239999771118164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,bfloat16,511,0.12311840057373047
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,fp8,511,0.10326559543609619
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,bfloat16,1023,0.21037759780883789
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,bfloat16,1,0.07731840014457703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,fp8,1023,0.15678880214691163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,fp8,1,0.08879520297050476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,bfloat16,3,0.07671040296554565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,bfloat16,7,0.07823839783668518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,fp8,7,0.037662398815155027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,fp8,7,0.08879839777946472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,fp8,15,0.09105600118637085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,bfloat16,31,0.07742720246315002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,bfloat16,63,0.034176000952720643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,fp8,31,0.08895519971847535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,bfloat16,63,0.08276159763336181
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,fp8,63,0.09279680252075195
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,bfloat16,127,0.08577759861946106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,fp8,127,0.09781119823455811
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,bfloat16,255,0.12777600288391114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,fp8,255,0.1250607967376709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,bfloat16,511,0.2162480115890503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,fp8,511,0.18165760040283202
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,bfloat16,1,0.12959359884262084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,bfloat16,1023,0.3810895919799805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,fp8,1,0.15630240440368653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,bfloat16,3,0.12957439422607422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,bfloat16,7,0.12974400520324708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,fp8,3,0.15872479677200318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,fp8,7,0.1562656044960022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,bfloat16,15,0.1300320029258728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,bfloat16,31,0.13711520433425903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,fp8,31,0.15630559921264647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,bfloat16,63,0.14022560119628907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,bfloat16,bfloat16,3,0.0490447998046875
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,fp8,63,0.1666192054748535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,bfloat16,127,0.1485919952392578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,fp8,127,0.17977759838104249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,bfloat16,255,0.2294895887374878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,fp8,255,0.22221920490264893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,bfloat16,511,0.4173327922821045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,fp8,511,0.34033119678497314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,1,0.011547199636697768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,1,0.012254399806261062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,3,0.011552000045776367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,fp8,3,0.08952159881591797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,3,0.012342400103807449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,7,0.01183520033955574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,bfloat16,1023,0.7266719818115235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,7,0.0123648002743721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,bfloat16,15,0.07636799812316894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,bfloat16,fp8,15,0.03779839873313904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,31,0.011582399904727935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,15,0.012860800325870513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,63,0.011585599929094314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,fp8,1023,0.5511583805084228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,127,0.011555200070142746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,63,0.012516799569129943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,255,0.013148799538612366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,127,0.012731200456619263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,511,0.016177600622177123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,511,0.018241600692272188
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,1023,0.015982399880886077
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,1023,0.017318400740623473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,2047,0.01668799966573715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,2047,0.017657600343227386
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,4095,0.018454399704933167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,4095,0.01836480051279068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,8191,0.02240640074014664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,8191,0.022040000557899474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,bfloat16,fp8,1023,0.2849839925765991
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,16383,0.0392304003238678
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,16383,0.028571200370788575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,1,0.012390399724245072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,1,0.011959999799728394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,3,0.012361600250005721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,7,0.01183520033955574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,7,0.012361600250005721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,15,0.011828800290822982
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,15,0.012494400143623352
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,31,0.012483199685811996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,bfloat16,fp8,15,0.15884319543838502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,63,0.011748799681663513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,63,0.012665599584579468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,127,0.011811199784278869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,127,0.012863999605178833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,255,0.01340160071849823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,255,0.014788800477981567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,511,0.01645279973745346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,511,0.018456000089645385
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,1023,0.016051200032234193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,1023,0.017351999878883362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,2047,0.01789119988679886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,2047,0.017951999604701997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,4095,0.02125599980354309
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,4095,0.021536000072956085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,8191,0.03826079964637756
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,8191,0.02776640057563782
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,16383,0.0602400004863739
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,fp8,16383,0.04232159852981567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,1,0.011988800019025803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,1,0.012865599989891053
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,3,0.011847999691963196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,3,0.012992000579833985
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,7,0.01183359995484352
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,7,0.012910400331020356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,15,0.011913599818944931
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,15,0.012899200618267059
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,31,0.011844799667596818
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,31,0.01290079951286316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,63,0.011963199824094772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,63,0.013208000361919403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,127,0.012193600088357926
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,127,0.012921600043773651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,bfloat16,15,0.011832000315189361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,255,0.013868799805641175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,255,0.015014399588108063
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,511,0.018279999494552612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,1023,0.01793919950723648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,31,0.012544000148773193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,1023,0.017377600073814392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,2047,0.020095999538898467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,2047,0.021329599618911742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,4095,0.037939199805259706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,4095,0.02813279926776886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,bfloat16,fp8,255,0.014584000408649444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,8191,0.05961120128631592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,8191,0.04146240055561066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,fp8,16383,0.06667519807815551
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,1,0.01218400001525879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,1,0.012921600043773651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,3,0.012964800000190735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,7,0.012185599654912949
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,7,0.01297439932823181
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,15,0.012148799747228623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,15,0.01292479932308197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,31,0.01218400001525879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,31,0.012972800433635712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,63,0.012247999757528305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,63,0.012972800433635712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,127,0.012379200011491776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,3,0.012100800126791
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,127,0.012916800379753113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,255,0.01401440054178238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,255,0.01488959938287735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,511,0.01845120042562485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,1023,0.01997919976711273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,1023,0.02096160054206848
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,bfloat16,bfloat16,31,0.011897599697113037
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,2047,0.02675360143184662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,4095,0.05906559824943543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,4095,0.04168640077114105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,8191,0.1023360013961792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,8191,0.0665328025817871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,16383,0.18876479864120482
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,fp8,16383,0.11691039800643921
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,bfloat16,1,0.05495679974555969
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,bfloat16,3,0.055067199468612674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,fp8,3,0.061153602600097653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,bfloat16,7,0.055159997940063474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,fp8,7,0.061217600107192995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,511,0.01677280068397522
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,bfloat16,15,0.055379199981689456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,fp8,15,0.06065760254859924
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,bfloat16,31,0.055238401889801024
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,fp8,31,0.06050400137901306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,bfloat16,63,0.05884000062942505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,fp8,63,0.06078400015830994
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,bfloat16,127,0.06416800022125244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,bfloat16,bfloat16,16383,0.10319679975509644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,3,0.012177599966526032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,fp8,127,0.0643775999546051
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,bfloat16,255,0.08565919995307922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,fp8,255,0.07897760272026062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,bfloat16,511,0.12761280536651612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,bfloat16,1,0.08362240195274354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,fp8,511,0.11020159721374512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,fp8,1,0.09450079798698426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,fp8,3,0.09506880044937134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,bfloat16,7,0.08283680081367492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,fp8,7,0.09465759992599487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,511,0.017023999989032746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,bfloat16,15,0.08342559933662415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,bfloat16,31,0.0842736005783081
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,fp8,15,0.09439200162887573
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,fp8,31,0.09476959705352783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,bfloat16,63,0.0888592004776001
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,bfloat16,bfloat16,2047,0.03780319988727569
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,bfloat16,127,0.09446079730987549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,fp8,127,0.1029360055923462
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,bfloat16,255,0.13763200044631957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,fp8,255,0.1290992021560669
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,bfloat16,511,0.21960480213165284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,bfloat16,1,0.138755202293396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,fp8,511,0.18979840278625487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,fp8,1,0.16244319677352906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,bfloat16,3,0.13787039518356323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,fp8,3,0.16238720417022706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,bfloat16,7,0.13751039505004883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,fp8,7,0.16268160343170165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,bfloat16,15,0.13757120370864867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,bfloat16,31,0.1419327974319458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,fp8,15,0.16072959899902345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,fp8,31,0.16557600498199462
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,bfloat16,63,0.1455008029937744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,fp8,63,0.17131199836730956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,bfloat16,fp8,1,0.06055840253829956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,fp8,127,0.17812319993972778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,bfloat16,255,0.2246448040008545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,fp8,255,0.22738559246063234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,bfloat16,511,0.40309920310974123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,bfloat16,1,0.2410367965698242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,fp8,511,0.3472368001937866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,fp8,1,0.29083681106567383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,fp8,3,0.29064478874206545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,bfloat16,3,0.24223840236663818
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,bfloat16,7,0.24360959529876708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,fp8,7,0.2918704032897949
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,bfloat16,15,0.24770081043243408
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,fp8,15,0.29148159027099607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,bfloat16,31,0.2593679904937744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,fp8,31,0.31228640079498293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,bfloat16,63,0.25930559635162354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,fp8,63,0.31313600540161135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,bfloat16,127,0.2782975912094116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,bfloat16,3,0.08257279992103576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,fp8,127,0.3256095886230469
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,bfloat16,fp8,63,0.0989087998867035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,bfloat16,1,0.09402880072593689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,fp8,1,0.10742080211639404
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,bfloat16,3,0.09482079744338989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,fp8,3,0.1061568021774292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,bfloat16,7,0.09542239904403686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,fp8,7,0.10713759660720826
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,bfloat16,15,0.09425119757652282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,fp8,15,0.10758399963378906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,bfloat16,31,0.09534400105476379
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,fp8,511,0.6602352142333985
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,fp8,31,0.1049232006072998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,bfloat16,511,0.8129679679870605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,bfloat16,63,0.10346399545669556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,fp8,63,0.1136944055557251
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,fp8,127,0.11382880210876464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,bfloat16,127,0.1097216010093689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,bfloat16,1,0.1491487979888916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,fp8,255,0.14193919897079468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,fp8,1,0.1690608024597168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,bfloat16,3,0.14930399656295776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,fp8,3,0.17224960327148436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,bfloat16,bfloat16,127,0.154094398021698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,fp8,7,0.16922080516815186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,fp8,15,0.17294880151748657
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,bfloat16,31,0.15309760570526124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,fp8,31,0.1736863970756531
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,bfloat16,63,0.15786399841308593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,fp8,63,0.18211840391159057
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,bfloat16,127,0.16708159446716309
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,fp8,127,0.18610399961471558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,bfloat16,255,0.24221279621124267
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,fp8,255,0.2363584041595459
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,bfloat16,1,0.258076810836792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,fp8,1,0.30411839485168457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,bfloat16,3,0.2586479902267456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,bfloat16,7,0.25815520286560056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,fp8,3,0.30427520275115966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,bfloat16,255,0.42735681533813474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,bfloat16,fp8,255,0.42042717933654783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,bfloat16,15,0.2617583990097046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,fp8,7,0.3048543930053711
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,fp8,15,0.3062128067016602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,bfloat16,31,0.2649712085723877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,bfloat16,63,0.26960480213165283
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,fp8,31,0.3204272031784058
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,fp8,63,0.3235136032104492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,bfloat16,127,0.286846399307251
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,bfloat16,bfloat16,255,0.15436960458755494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,fp8,127,0.32832319736480714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,bfloat16,7,0.14952640533447265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,bfloat16,255,0.42547039985656737
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,bfloat16,bfloat16,15,0.14979840517044068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,bfloat16,fp8,255,0.4284815788269043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,bfloat16,1,0.4726880073547363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,fp8,1,0.559822416305542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,bfloat16,3,0.4730463981628418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,fp8,3,0.5652927875518798
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,bfloat16,7,0.48345441818237306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,fp8,7,0.5606895923614502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,bfloat16,15,0.4878223896026611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,fp8,15,0.5864687919616699
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,bfloat16,31,0.49266881942749025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,fp8,31,0.5965151786804199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,bfloat16,63,0.5025712013244629
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,1,0.011470399796962738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,3,0.011566399782896041
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,fp8,63,0.6001408100128174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,bfloat16,127,0.5294688224792481
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,3,0.012265600264072418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,7,0.011774399876594543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,15,0.011555200070142746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,fp8,127,0.6167024135589599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,31,0.01149120032787323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,63,0.011478400230407715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,31,0.01239359974861145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,63,0.012203200161457062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,127,0.011905600130558015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,127,0.012216000258922577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,255,0.013524800539016724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,fp8,255,0.8182831764221191
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,255,0.014177599549293518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,511,0.01647839993238449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,1023,0.01584320068359375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,1023,0.01695999950170517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,511,0.018244799971580506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,2047,0.01769919991493225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,4095,0.021270400285720824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,4095,0.02088640034198761
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,8191,0.038334399461746216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,8191,0.026895999908447266
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,16383,0.04210399985313416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,1,0.011628799885511399
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,1,0.012308800220489502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,3,0.011596799641847611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,3,0.0123648002743721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,7,0.01162559986114502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,7,0.012441600114107132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,15,0.01173280030488968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,15,0.0124208003282547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,31,0.011670400202274323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,31,0.01239359974861145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,63,0.011675199866294861
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,63,0.01290079951286316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,127,0.011695999652147293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,127,0.012796799838542938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,255,0.013358399271965027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,255,0.014636799693107605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,511,0.018276800215244294
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,1023,0.017788800597190856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,1023,0.017339199781417847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,2047,0.02032800018787384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,2047,0.020982399582862854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,1,0.01286720037460327
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,4095,0.038145598769187924
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,4095,0.027233600616455078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,7,0.01281919926404953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,8191,0.05964319705963135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,8191,0.04232319891452789
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,fp8,16383,0.06848639845848084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,bfloat16,bfloat16,255,0.7975840091705322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,1,0.012857599556446076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,3,0.01281599998474121
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,3,0.011841599643230439
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,7,0.012035199999809265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,7,0.01279360055923462
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,2047,0.017360000312328337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,15,0.01210559979081154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,31,0.012414400279521943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,15,0.012748800218105316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,31,0.012987199425697326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,63,0.011964800208806992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,63,0.012910400331020356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,127,0.012969599664211273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,bfloat16,16383,0.06033920049667359
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,255,0.013971200585365296
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,255,0.015056000649929046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,511,0.017059199512004852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,511,0.018595199286937713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,1023,0.019649599492549897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,1023,0.020926399528980254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,2047,0.037668800354003905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,2047,0.027188798785209654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,4095,0.06054880023002625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,4095,0.042788800597190854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,8191,0.06703360080718994
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,16383,0.18928639888763427
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,511,0.016371199488639833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,fp8,16383,0.11648160219192505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,1,0.01196800023317337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,1,0.013225600123405457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,3,0.012044800072908401
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,3,0.013193599879741669
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,7,0.011992000043392181
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,7,0.01321759968996048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,15,0.011998400092124939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,15,0.013246400654315949
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,31,0.012054400146007537
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,31,0.01324319988489151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,63,0.012080000340938568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,63,0.013214400410652161
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,127,0.012139199674129486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,127,0.013236799836158752
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,255,0.013897599279880523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,255,0.015113599598407745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,511,0.01721920073032379
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,511,0.018651199340820313
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,1023,0.03609760105609894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,1023,0.026395198702812196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,bfloat16,bfloat16,16383,0.10343680381774903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,2047,0.057785600423812866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,2047,0.04179840087890625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,4095,0.10156960487365722
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,4095,0.06592959761619568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,8191,0.18747999668121337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,8191,0.11631360054016113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,1,0.013302400708198547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,1,0.014159999787807465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,127,0.01210559979081154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,3,0.013278399407863618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,bfloat16,16383,0.36054561138153074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,3,0.014166399836540222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,7,0.01324480026960373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,bfloat16,fp8,16383,0.2122096061706543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,15,0.013505600392818451
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,15,0.014316800236701965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,7,0.013915200531482697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,31,0.013675199449062347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,31,0.014203199744224548
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,63,0.01329279989004135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,63,0.013956800103187561
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,127,0.014060799777507783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,255,0.0150736004114151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,255,0.015755200386047365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,511,0.018016000092029572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,511,0.019539199769496918
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,1023,0.02136639952659607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,1023,0.02206239998340607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,2047,0.03897280097007751
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,2047,0.027907198667526244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,4095,0.061166399717330934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,4095,0.04408159852027893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,8191,0.10374560356140136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,8191,0.10437439680099488
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,8191,0.0680288016796112
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,16383,0.1921712040901184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,16383,0.12120000123977662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,1,0.013665600121021271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,1,0.014481599628925323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,fp8,32767,0.21776480674743653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,3,0.013470399379730224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,3,0.014228799939155578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,32767,0.36583681106567384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,7,0.014260800182819366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,15,0.013352000713348388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,7,0.013400000333786011
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,31,0.014257599413394929
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,15,0.014126400649547576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,63,0.013518400490283966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,63,0.014312000572681427
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,127,0.014336000382900237
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,127,0.013473600149154663
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,255,0.015356799960136414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,255,0.01634719967842102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,511,0.018910400569438934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,511,0.01982080042362213
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,1023,0.037615999579429626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,1023,0.027345600724220275
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,2047,0.05937759876251221
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,2047,0.04199999868869782
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,4095,0.10341759920120239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,4095,0.06635040044784546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,bfloat16,bfloat16,1,0.012083200365304947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,8191,0.18962559700012208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,8191,0.11755839586257935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,16383,0.2173072099685669
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,16383,0.3634479999542236
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,1,0.017161600291728973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,1,0.01820479929447174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,3,0.017081600427627564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,3,0.018171200156211854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,7,0.017161600291728973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,fp8,32767,0.4140655994415283
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,7,0.01831679940223694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,15,0.01720000058412552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,31,0.01701440066099167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,32767,0.7083792209625244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,31,0.01865600049495697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,15,0.018555200099945067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,63,0.01700959950685501
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,bfloat16,bfloat16,127,0.0135343998670578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,127,0.01852000057697296
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,255,0.020764799416065217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,255,0.023375999927520753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,511,0.03741759955883026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,511,0.02970399856567383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,1023,0.06240000128746033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,1023,0.045793598890304564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,2047,0.10565119981765747
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,2047,0.07114560008049012
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,4095,0.19059360027313232
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,4095,0.11883200407028198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,8191,0.36183838844299315
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,8191,0.22117760181427001
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,bfloat16,bfloat16,31,0.013414399325847625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,16383,0.7067168235778809
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,16383,0.41742238998413084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,1,0.011204800009727478
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,bfloat16,fp8,15,0.012567999958992004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,3,0.011219199746847153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,1,0.012510399520397186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,7,0.011315199732780456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,3,0.012222400307655335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,15,0.011296000331640244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,15,0.012225600332021714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,31,0.011287999898195266
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,31,0.012399999797344208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,63,0.011296000331640244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,63,0.012272000312805176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,127,0.011351999640464783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,127,0.01239359974861145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,255,0.012856000661849975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,32767,0.8060288429260254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,511,0.015831999480724335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,1023,0.015067200362682342
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,511,0.01823199987411499
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,32767,1.397163200378418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,1023,0.01623679995536804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,4095,0.0164015993475914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,2047,0.01584800034761429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,2047,0.01709440052509308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,4095,0.017207999527454377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,8191,0.017790399491786957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,16383,0.02032800018787384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,16383,0.02061759978532791
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,bfloat16,32767,0.022673599421977997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,8191,0.019043199717998505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,bfloat16,127,0.01724960058927536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,32767,0.024483199417591094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,bfloat16,fp8,63,0.018572799861431122
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,1,0.01130400002002716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,1,0.012415999919176102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,7,0.012083200365304947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,7,0.011505600064992905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,15,0.011612799763679505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,15,0.01210239976644516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,31,0.011617600172758102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,31,0.01239359974861145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,63,0.011376000195741653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,63,0.01233920007944107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,127,0.011640000343322753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,127,0.012092799693346024
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,255,0.014979200065135955
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,511,0.015945599973201753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,511,0.01871519982814789
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,1023,0.015219199657440185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,1023,0.01703200042247772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,2047,0.015688000619411467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,2047,0.01733279973268509
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,4095,0.01640480011701584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,4095,0.017846399545669557
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,8191,0.01777919977903366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,8191,0.01852319985628128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,16383,0.021033599972724915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,16383,0.02077919989824295
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,32767,0.024340799450874327
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,32767,0.02438880056142807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,1,0.011315199732780456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,1,0.012129600346088409
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,3,0.011342400312423706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,3,0.012095999717712403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,7,0.011385600268840789
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,7,0.01218079999089241
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,15,0.011419200152158738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,15,0.012249600142240524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,31,0.012091200053691863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,63,0.01143999993801117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,63,0.012116800248622894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,127,0.011403200030326844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,127,0.012113600224256515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,7,0.012664000689983367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,255,0.013064000010490417
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,511,0.016143999993801117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,511,0.018083199858665466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,1023,0.015697599947452547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,1023,0.01672320067882538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,bfloat16,fp8,255,0.014287999272346497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,2047,0.01589920073747635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,2047,0.017287999391555786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,4095,0.01672320067882538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,4095,0.017555199563503265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,8191,0.01881919950246811
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,8191,0.01902559995651245
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,16383,0.023310400545597076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,16383,0.022752000391483305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,32767,0.03958880007266998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,32767,0.03107680082321167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,1,0.011462400108575821
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,3,0.011484800279140473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,3,0.012240000069141388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,7,0.01143999993801117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,7,0.012064000219106674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,15,0.011392000317573547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,15,0.012121599912643433
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,31,0.011291199922561645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,255,0.013185599446296692
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,31,0.01210559979081154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,63,0.012135999649763108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,63,0.011692799627780914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,127,0.011472000181674958
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,127,0.01244800016283989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,255,0.01305440068244934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,255,0.014361600577831268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,511,0.016059200465679168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,511,0.018167999386787415
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,1023,0.015707199275493623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,1023,0.016835199296474458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,2047,0.016046400368213653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,2047,0.017022399604320525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,4095,0.01621599942445755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,4095,0.017671999335289002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,8191,0.018111999332904815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,8191,0.019089600443840025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,16383,0.02048960030078888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,16383,0.021491199731826782
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,bfloat16,32767,0.02470880001783371
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,32767,0.02526719868183136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,1,0.011432000249624253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,bfloat16,31,0.011264000087976456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,1,0.012353599816560746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,3,0.011475200206041336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,3,0.012214399874210358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,7,0.01165279969573021
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,7,0.012247999757528305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,15,0.011497599631547928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,15,0.012324800342321396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,31,0.01228799968957901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,63,0.011633600294589996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,63,0.012267199903726577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,127,0.011635199934244157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,127,0.012319999933242797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,255,0.013167999684810638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,255,0.014337599277496338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,511,0.016209599375724793
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,bfloat16,fp8,255,0.014528000354766845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,511,0.018187199532985688
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,1023,0.015719999372959138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,1023,0.016993600130081176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,2047,0.0160863995552063
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,2047,0.01717280000448227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,4095,0.017127999663352968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,4095,0.01786399930715561
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,8191,0.01939679980278015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,8191,0.0189968004822731
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,16383,0.02282560020685196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,16383,0.022972799837589264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,32767,0.039401599764823915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,1,0.011574400216341018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,fp8,32767,0.028960001468658448
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,1,0.012406399846076966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,3,0.011532799899578094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,3,0.012326399981975555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,7,0.011721599847078323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,7,0.012305600196123123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,15,0.011515200138092041
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,15,0.012387199699878693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,31,0.011556799709796905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,31,0.012451200187206269
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,63,0.011643200367689132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,63,0.012558400630950928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,127,0.011582399904727935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,bfloat16,fp8,1,0.01239520013332367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,127,0.012494400143623352
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,255,0.013169600069522858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,255,0.014433600008487701
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,511,0.0162992000579834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,1023,0.016036799550056456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,bfloat16,3,0.011481600254774094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,2047,0.01656160056591034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,2047,0.017263999581336974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,4095,0.018318399786949158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,4095,0.017854399979114532
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,8191,0.022123199701309205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,8191,0.021347199380397797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,16383,0.03812159895896912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,16383,0.027995198965072632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,bfloat16,32767,0.060971200466156006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,1,0.012561599910259246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,1,0.013327999413013459
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,3,0.013300800323486328
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,7,0.012492799758911132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,7,0.013257600367069244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,15,0.01255359947681427
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,15,0.013233600556850434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,31,0.012566399574279786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,31,0.013312000036239623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,63,0.01263200044631958
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,63,0.01337919980287552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,127,0.012630400061607362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,127,0.013363200426101684
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,bfloat16,bfloat16,31,0.011508800089359283
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,255,0.014412799477577209
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,255,0.015455999970436096
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,511,0.01722559928894043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,511,0.018862399458885192
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,1023,0.01834080070257187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,1023,0.018049600720405578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,2047,0.020227199792861937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,2047,0.021777600049972534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,4095,0.038790398836135866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,4095,0.027852800488471986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,8191,0.06035360097885132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,8191,0.04338879883289337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,16383,0.06872959733009339
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,32767,0.191048002243042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,fp8,32767,0.11990560293197632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,511,0.017950400710105896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,1,0.012377600371837615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,bfloat16,fp8,3,0.012305600196123123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,1,0.013420799374580383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,3,0.012614400684833526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,7,0.012465599924325943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,3,0.013678400218486786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,7,0.013368000090122224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,15,0.012484800070524216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,15,0.013542400300502777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,31,0.012600000202655792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,31,0.013281600177288055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,63,0.012777599692344665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,63,0.01337919980287552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,127,0.012699200212955475
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,127,0.01350879967212677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,255,0.014547200500965118
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,255,0.015340800583362579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,511,0.017452800273895265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,511,0.019023999571800232
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,1023,0.02051839977502823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,3,0.012537600100040435
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,1023,0.021411199867725373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,2047,0.038446399569511416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,2047,0.027859199047088622
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,4095,0.042320001125335696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,4095,0.060335999727249144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,8191,0.10420479774475097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,8191,0.06812639832496643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,16383,0.11831200122833252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,16383,0.19105119705200196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,1,0.012641599774360657
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,1,0.013684800267219544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,fp8,32767,0.21608319282531738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,3,0.012591999769210816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,bfloat16,bfloat16,32767,0.3651904106140137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,7,0.013518400490283966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,7,0.012801599502563477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,15,0.013571199774742127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,15,0.012775999307632447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,31,0.01271360069513321
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,31,0.01356160044670105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,63,0.013575999438762665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,63,0.012668800354003907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,127,0.01287200003862381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,127,0.013583999872207642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,255,0.01470080018043518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,255,0.015521599352359772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,511,0.017985600233078002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,511,0.019092799723148347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,1023,0.02648319900035858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,2047,0.05954239964485168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,2047,0.04158880114555359
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,bfloat16,bfloat16,16383,0.10497119426727294
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,4095,0.10242079496383667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,4095,0.06792160272598266
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,8191,0.18851360082626342
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,8191,0.11816639900207519
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,16383,0.36236960887908937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,16383,0.21253280639648436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,1,0.011409600079059602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,1,0.011987199634313583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,3,0.011308799684047698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,3,0.012091200053691863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,32767,0.04472000002861023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,7,0.01143999993801117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,32767,0.7083663940429688
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,15,0.01143840029835701
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,7,0.012291199713945388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,32767,0.41173601150512695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,31,0.011324799805879592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,31,0.012316799908876418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,15,0.012425599992275238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,63,0.012015999853610992
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,127,0.011572799831628799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,63,0.011528000235557556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,255,0.012964800000190735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,127,0.012438400089740754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,255,0.014353600144386292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,511,0.016139200329780577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,511,0.017888000607490538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,1023,0.016972799599170686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,1023,0.015886400640010834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,2047,0.01616320013999939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,2047,0.016828800737857818
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,4095,0.017030400037765504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,4095,0.017927999794483184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,8191,0.01930239945650101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,8191,0.01884640008211136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,16383,0.02281759977340698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,16383,0.023367999494075774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,bfloat16,32767,0.03965120017528534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,1,0.011507199704647064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,1,0.012435200065374375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,fp8,3,0.013592000305652618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,3,0.011665599793195725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,3,0.012251199781894683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,7,0.011580800265073776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,15,0.011670400202274323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,15,0.012361600250005721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,31,0.012300799787044524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,63,0.011609599739313126
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,63,0.01228640004992485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,127,0.012275200337171555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,255,0.01318880021572113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,255,0.01422400027513504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,bfloat16,bfloat16,1023,0.03676480054855347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,511,0.016235199570655823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,511,0.017980800569057466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,1023,0.015887999534606935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,1023,0.016910399496555328
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,2047,0.017267200350761413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,4095,0.018219199776649476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,4095,0.0178399994969368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,8191,0.021536000072956085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,8191,0.021411199867725373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,16383,0.03870239853858948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,16383,0.027939200401306152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,32767,0.061153602600097653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,bfloat16,fp8,1023,0.01685439944267273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,32767,0.04415839910507202
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,1,0.012372799962759019
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,3,0.011638399958610535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,3,0.012329600006341934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,7,0.011670400202274323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,7,0.01234399974346161
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,15,0.011641599982976914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,15,0.012379200011491776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,31,0.011774399876594543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,31,0.012476799637079239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,63,0.011767999827861786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,63,0.012387199699878693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,127,0.011750400066375732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,127,0.012388800084590913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,255,0.013595199584960938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,bfloat16,fp8,32767,0.028646400570869444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,255,0.014535999298095703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,511,0.016334399580955505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,511,0.017902399599552154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,1023,0.017027199268341064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,fp8,7,0.012399999797344208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,31,0.011710400134325028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,2047,0.017726400494575502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,2047,0.01753759980201721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,4095,0.021476800739765167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,4095,0.021104000508785248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,8191,0.03746879994869232
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,8191,0.0270687997341156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,16383,0.059956800937652585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,16383,0.042843198776245116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,32767,0.10298880338668823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,1,0.014688000082969666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,fp8,32767,0.06750720143318176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,1,0.01574240028858185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,3,0.014697599411010741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,3,0.015556800365447997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,7,0.015516799688339234
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,15,0.014668799936771393
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,2047,0.016406400501728056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,15,0.015582400560379028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,31,0.014713600277900696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,63,0.0146479994058609
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,31,0.015719999372959138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,63,0.01570879966020584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,127,0.014644800126552582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,127,0.015571199357509613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,255,0.016835199296474458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,255,0.0174127995967865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,511,0.020287999510765077
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,511,0.021137599647045136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,1023,0.039043200016021726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,1023,0.028721600770950317
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,2047,0.060468798875808714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,2047,0.04439040124416351
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,4095,0.10460799932479858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,4095,0.0694383978843689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,8191,0.19145439863204955
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,8191,0.11913759708404541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,1,0.011857599765062333
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,1,0.018318399786949158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,1,0.020465600490570068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,3,0.018464000523090364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,16383,0.36775200366973876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,7,0.018484799563884734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,fp8,16383,0.22263998985290528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,15,0.01849599927663803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,7,0.020121599733829498
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,31,0.018411199748516082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,15,0.020070399343967437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,31,0.019952000677585603
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,63,0.01852000057697296
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,63,0.02009119987487793
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,127,0.018534399569034576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,127,0.020015999674797058
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,255,0.022256000339984892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,255,0.024028800427913666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,511,0.040012800693511964
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,511,0.03238880038261414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,1023,0.06441919803619385
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,1023,0.04796479940414429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,2047,0.10723199844360351
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,2047,0.07253440022468567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,4095,0.1917680025100708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,4095,0.12275680303573608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,bfloat16,bfloat16,1023,0.01595200002193451
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,8191,0.3674815893173218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,bfloat16,bfloat16,127,0.011763200163841248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,8191,0.22342560291290284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,1,0.029131200909614564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,1,0.026465600728988646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,3,0.026073598861694337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,3,0.02915999889373779
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,7,0.025979200005531312
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,7,0.028939199447631837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,15,0.02629440128803253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,15,0.028880000114440918
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,31,0.026155200600624085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,16383,0.42300000190734866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,31,0.02919520139694214
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,63,0.026009601354599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,bfloat16,16383,0.7093359947204589
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,63,0.029123198986053467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,127,0.029151999950408937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,127,0.027243199944496154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,255,0.04454559981822968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,511,0.06544960141181946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,255,0.03778400123119354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,511,0.05394880175590515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,1023,0.1102336049079895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,1023,0.08063200116157532
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,2047,0.19506399631500243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,2047,0.12970720529556273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,4095,0.227891206741333
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,4095,0.3663088083267212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,1,0.02133280038833618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,1,0.022388799488544463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,3,0.020744000375270844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,bfloat16,fp8,3,0.020470400154590607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,8191,0.4279263973236084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,3,0.022308799624443054
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,7,0.02107200026512146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,15,0.020814399421215057
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,15,0.022686399519443512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,31,0.020895999670028687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,31,0.02265920042991638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,63,0.020880000293254854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,63,0.022767999768257143
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,127,0.02274399995803833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,fp8,16383,0.824398422241211
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,255,0.025540798902511597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,16383,1.3881967544555665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,255,0.027590399980545043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,511,0.03428960144519806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,511,0.045694398880004886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,1023,0.06740800142288209
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,1023,0.050644797086715695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,2047,0.07591040134429931
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,2047,0.12488640546798706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,4095,0.1949455976486206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,4095,0.12730720043182372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,1,0.029177600145339967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,1,0.031564798951148984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,bfloat16,bfloat16,7,0.015108799934387207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,3,0.032374399900436404
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,3,0.028683200478553772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,7,0.03099200129508972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,15,0.029126399755477907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,15,0.03190400004386902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,31,0.032180801033973694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,63,0.02877120077610016
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,63,0.03197599947452545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,127,0.02932800054550171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,127,0.03199679851531982
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,255,0.046275201439857486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,255,0.040489599108695984
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,511,0.0690064013004303
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,511,0.05683839917182922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,1023,0.11326559782028198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,1023,0.08380799889564514
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,2047,0.13209439516067506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,bfloat16,bfloat16,8191,0.7066351890563964
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,fp8,7,0.022972799837589264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,1,0.04334399998188019
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,4095,0.375596809387207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,fp8,4095,0.23295679092407226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,1,0.04920800030231476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,3,0.04365920126438141
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,7,0.043377599120140074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,3,0.04862079918384552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,15,0.04339680075645447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,15,0.049000000953674315
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,31,0.04349119961261749
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,31,0.04902719855308533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,63,0.04403040111064911
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,63,0.04896160066127777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,127,0.05092480182647705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,127,0.04973919987678528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,255,0.07200000286102295
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,255,0.0667967975139618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,511,0.11696000099182129
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,511,0.09630720019340515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,1023,0.14711040258407593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,1023,0.20233280658721925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,7,0.02831200063228607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,31,0.027959999442100526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,2047,0.24274559020996095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,2047,0.37142720222473147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,bfloat16,1,0.03432160019874573
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,fp8,1,0.03695200085639953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,bfloat16,3,0.03447999954223633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,fp8,3,0.03688639998435974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,bfloat16,7,0.03474240005016327
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,bfloat16,4095,0.7089536190032959
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,4095,0.44159998893737795
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,fp8,7,0.03685919940471649
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,fp8,31,0.03688960075378418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,fp8,15,0.037222400307655334
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,bfloat16,31,0.034062400460243225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,bfloat16,bfloat16,2047,0.19736640453338622
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,bfloat16,127,0.03514719903469086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,fp8,63,0.03717280030250549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,fp8,127,0.03762879967689514
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,bfloat16,255,0.05212479829788208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,fp8,255,0.0460783988237381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,bfloat16,511,0.07410240173339844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,fp8,511,0.06318560242652893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,bfloat16,1023,0.11953920125961304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,fp8,1023,0.08941760063171386
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,bfloat16,bfloat16,127,0.02088800072669983
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,bfloat16,2047,0.20323679447174073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,fp8,2047,0.1395359992980957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,bfloat16,1,0.04826720058917999
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,fp8,1,0.05510079860687256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,bfloat16,3,0.04868319928646088
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,bfloat16,7,0.04918240010738373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,fp8,7,0.054073601961135864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,fp8,15,0.054420799016952515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,bfloat16,fp8,7,0.049502399563789365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,bfloat16,31,0.04937280118465424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,fp8,31,0.05411199927330017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,bfloat16,63,0.04900639951229095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,fp8,63,0.05526080131530762
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,bfloat16,127,0.0578544020652771
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,fp8,127,0.05512639880180359
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,fp8,255,0.07321439981460572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,bfloat16,255,0.08027679920196533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,bfloat16,511,0.12364000082015991
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,fp8,511,0.10173120498657226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,fp8,1023,0.15359359979629517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,bfloat16,2047,0.3757936000823975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,fp8,2047,0.24879040718078613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,bfloat16,1,0.07800800204277039
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,fp8,1,0.08923199772834778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,bfloat16,3,0.07796319723129272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,fp8,3,0.08928480148315429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,bfloat16,7,0.07803679704666137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,fp8,7,0.08919039964675904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,bfloat16,15,0.07808640003204345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,bfloat16,31,0.07779679894447326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,fp8,15,0.0893664002418518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,fp8,31,0.09080160260200501
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,bfloat16,63,0.08127679824829101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,bfloat16,15,0.03440000116825104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,fp8,63,0.09313120245933533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,bfloat16,127,0.08599200248718261
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,fp8,127,0.09698399901390076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,bfloat16,255,0.1266208052635193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,fp8,255,0.1229856014251709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,bfloat16,bfloat16,63,0.033846399188041686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,fp8,511,0.18409600257873535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,bfloat16,511,0.21642560958862306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,fp8,1023,0.2818079948425293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,1,0.01185920014977455
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,1,0.012644800543785095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,fp8,3,0.054073601961135864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,3,0.01186240017414093
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,7,0.01186399981379509
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,3,0.012444800138473511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,7,0.012681600451469422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,fp8,2047,0.4757279872894287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,15,0.01268479973077774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,bfloat16,2047,0.7181424140930176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,31,0.012622399628162384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,31,0.011760000139474869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,63,0.012617599964141846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,127,0.011764799803495407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,63,0.01159520000219345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,127,0.012740799784660339
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,255,0.013492800295352936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,bfloat16,1023,0.2101072072982788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,511,0.01653439998626709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,255,0.01406719982624054
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,511,0.01812160015106201
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,1023,0.017319999635219574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,2047,0.01619199961423874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,2047,0.017398400604724883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,4095,0.018371200561523436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,4095,0.017740799486637114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,8191,0.022195200622081756
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,8191,0.02222079932689667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,16383,0.038431999087333676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,16383,0.028563201427459717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,32767,0.06212639808654785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,1,0.012025599926710128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,fp8,32767,0.04426079988479614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,1,0.012620800733566284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,3,0.012361600250005721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,7,0.011948800086975098
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,7,0.012342400103807449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,15,0.011956799775362015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,15,0.012368000298738479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,31,0.011974400281906128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,31,0.012347199767827988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,63,0.012004800140857697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,63,0.012238399684429168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,127,0.011982399970293045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,127,0.012404800206422806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,255,0.013537600636482239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,255,0.014256000518798828
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,511,0.01653439998626709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,511,0.01793919950723648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,1023,0.016139200329780577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,1023,0.017155200242996216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,2047,0.017918400466442108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,2047,0.017321600019931792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,4095,0.022228799760341644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,4095,0.02083680033683777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,8191,0.0378495991230011
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,8191,0.026903998851776124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,16383,0.059673601388931276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,16383,0.04192320108413696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,fp8,32767,0.0672432005405426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,1,0.011692799627780914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,1,0.012494400143623352
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,3,0.011736000329256058
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,3,0.012513600289821625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,7,0.011939200013875962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,7,0.012518399953842163
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,bfloat16,bfloat16,15,0.04906400144100189
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,15,0.011929599940776825
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,15,0.012583999335765839
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,31,0.012582400441169738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,63,0.011795199662446975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,15,0.011740799993276596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,63,0.012622399628162384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,127,0.011992000043392181
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,255,0.01356000006198883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,255,0.01440960019826889
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,511,0.016646400094032288
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,511,0.018137599527835845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,1023,0.017627200484275816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,1023,0.0173567995429039
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,2047,0.02044160068035126
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,bfloat16,bfloat16,1023,0.016072000563144683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,2047,0.021035200357437132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,4095,0.03749600052833557
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,8191,0.059956800937652585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,8191,0.04301120042800903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,3,0.012078399956226348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,16383,0.10272639989852905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,16383,0.06713280081748962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,32767,0.11769599914550781
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,32767,0.18942400217056274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,bfloat16,1,0.05477439761161804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,fp8,1,0.06064479947090149
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,bfloat16,3,0.05478399991989136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,fp8,3,0.06071360111236572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,bfloat16,7,0.05468000173568725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,fp8,7,0.0607375979423523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,bfloat16,15,0.05470560193061828
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,fp8,15,0.06064000129699707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,fp8,31,0.060924798250198364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,bfloat16,63,0.056561601161956784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,bfloat16,bfloat16,32767,0.10374399423599243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,fp8,63,0.06080160140991211
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,bfloat16,bfloat16,1023,0.37670559883117677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,bfloat16,127,0.06298400163650512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,fp8,127,0.06366559863090515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,fp8,255,0.07792959809303283
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,bfloat16,255,0.08280479907989502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,bfloat16,511,0.12670079469680787
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,fp8,511,0.10932159423828125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,bfloat16,31,0.012252800166606903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,bfloat16,1023,0.21370561122894288
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,fp8,1023,0.16210880279541015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,fp8,1,0.09552000164985656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,bfloat16,3,0.08315039873123169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,bfloat16,7,0.08389599919319153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,fp8,3,0.09485599994659424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,fp8,7,0.09470239877700806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,bfloat16,15,0.0841488003730774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,fp8,15,0.09478399753570557
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,bfloat16,31,0.08325600028038024
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,fp8,31,0.0956272006034851
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,bfloat16,63,0.08780800104141236
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,fp8,63,0.09764320254325867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,4095,0.02674719989299774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,bfloat16,127,0.09523040056228638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,fp8,127,0.103438401222229
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,bfloat16,255,0.13685760498046876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,fp8,255,0.1279327988624573
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,bfloat16,511,0.21708641052246094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,bfloat16,1,0.13872640132904052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,fp8,1023,0.28879361152648925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,bfloat16,1023,0.38335840702056884
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,fp8,1,0.1623792052268982
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,bfloat16,3,0.13801440000534057
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,bfloat16,7,0.1386911988258362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,fp8,3,0.16208479404449463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,fp8,7,0.16182559728622437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,bfloat16,15,0.13852319717407227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,fp8,15,0.16248960494995118
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,bfloat16,bfloat16,31,0.05488640069961548
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,bfloat16,31,0.141211199760437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,fp8,31,0.1630784034729004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,bfloat16,63,0.14453599452972413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,fp8,63,0.17167199850082399
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,fp8,127,0.1755728006362915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,bfloat16,255,0.2236639976501465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,fp8,255,0.22767200469970703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,bfloat16,511,0.40302557945251466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,fp8,511,0.3510207891464233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,bfloat16,1,0.08267840147018432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,bfloat16,fp8,127,0.013121600449085235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,bfloat16,1,0.09470880031585693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,bfloat16,3,0.09409279823303222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,fp8,1,0.10713119506835937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,fp8,3,0.10612640380859376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,bfloat16,7,0.0949392020702362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,fp8,7,0.10569759607315063
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,bfloat16,15,0.09511359930038452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,fp8,1023,0.5470831871032715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,fp8,15,0.10607680082321166
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,bfloat16,1023,0.7248847961425782
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,bfloat16,31,0.09492639899253845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,fp8,31,0.1062000036239624
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,bfloat16,63,0.10230400562286376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,bfloat16,127,0.10651839971542358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,fp8,63,0.11046240329742432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,bfloat16,255,0.14699360132217407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,fp8,255,0.13984800577163697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,fp8,127,0.1159440040588379
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,bfloat16,1,0.14864959716796874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,bfloat16,511,0.2219248056411743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,bfloat16,3,0.14791040420532225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,bfloat16,fp8,511,0.190339195728302
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,fp8,1,0.1733728051185608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,bfloat16,fp8,511,0.2015631914138794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,fp8,3,0.17142720222473146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,bfloat16,7,0.14839199781417847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,fp8,7,0.17279360294342042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,bfloat16,15,0.14783519506454468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,bfloat16,31,0.15399520397186278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,fp8,15,0.17197760343551635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,fp8,31,0.173471999168396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,fp8,63,0.18431040048599243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,bfloat16,127,0.1651520013809204
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,fp8,127,0.18787519931793212
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,bfloat16,255,0.24214398860931396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,fp8,255,0.23698079586029053
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,bfloat16,1,0.26270720958709715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,bfloat16,511,0.40722880363464353
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,fp8,511,0.35659360885620117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,bfloat16,3,0.25990560054779055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,fp8,1,0.3065376043319702
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,fp8,3,0.3095439910888672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,bfloat16,bfloat16,127,0.15072319507598878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,bfloat16,15,0.2584287881851196
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,bfloat16,7,0.261897611618042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,fp8,15,0.30732159614562987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,fp8,7,0.30443360805511477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,bfloat16,31,0.2693808078765869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,bfloat16,63,0.267411208152771
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,fp8,31,0.3179136037826538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,fp8,63,0.3291152000427246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,bfloat16,127,0.28149440288543703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,fp8,127,0.3277120113372803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,1,0.012041600048542022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,1,0.012683199346065521
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,3,0.01215519979596138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,3,0.01276639997959137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,7,0.011849600076675414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,bfloat16,255,0.4200399875640869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,7,0.01273919939994812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,15,0.01194079965353012
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,fp8,255,0.42868800163269044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,31,0.012595200538635254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,15,0.01300320029258728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,63,0.012559999525547028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,127,0.012127999961376191
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,63,0.01183520033955574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,127,0.01279039978981018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,255,0.013777600228786468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,255,0.014505599439144135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,511,0.01650719940662384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,bfloat16,511,0.7787231922149658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,511,0.01845120042562485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,bfloat16,fp8,511,0.6745471954345703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,1023,0.015897600352764128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,1023,0.017407999932765962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,2047,0.01743839979171753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,2047,0.017798399925231932
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,8191,0.03874239921569824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,4095,0.021782399713993074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,16383,0.04349919855594635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,16383,0.05971840023994446
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,32767,0.10436799526214599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,1,0.011935999989509583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,32767,0.0703216016292572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,1,0.012943999469280243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,3,0.011972799897193909
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,3,0.012782399356365205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,7,0.012768000364303589
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,15,0.01188960000872612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,15,0.012761600315570831
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,bfloat16,bfloat16,63,0.1571887969970703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,31,0.01191679984331131
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,31,0.012748800218105316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,63,0.012768000364303589
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,63,0.012111999839544297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,127,0.012142399698495865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,127,0.0130048006772995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,255,0.01406719982624054
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,511,0.016995200514793397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,511,0.018449600040912627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,1023,0.01788160055875778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,1023,0.017392000555992125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,2047,0.019721600413322448
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,2047,0.021062399446964263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,4095,0.03790079951286316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,4095,0.02704479992389679
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,8191,0.04202559888362885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,16383,0.10454080104827881
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,16383,0.06707360148429871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,32767,0.1914687991142273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,32767,0.11826080083847046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,1,0.012185599654912949
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,bfloat16,31,0.01204639971256256
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,3,0.01207519993185997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,3,0.012881599366664886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,7,0.012039999663829803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,7,0.012736000120639801
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,15,0.01220960021018982
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,15,0.013115200400352477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,31,0.012163200229406358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,4095,0.021209600567817687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,31,0.013153600692749023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,63,0.012108799815177918
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,bfloat16,fp8,8191,0.02709920108318329
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,127,0.012476799637079239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,63,0.01279039978981018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,127,0.013012799620628356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,255,0.014633600413799287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,511,0.017166399955749513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,511,0.018203200399875642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,1023,0.02001120001077652
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,1023,0.020454399287700653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,7,0.012100800126791
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,2047,0.03756000101566315
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,2047,0.02664479911327362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,4095,0.05960320234298706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,4095,0.04192320108413696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,8191,0.10305919647216796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,8191,0.06701599955558776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,fp8,255,0.014667199552059173
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,16383,0.18919839859008789
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,16383,0.11590080261230469
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,1,0.013388800621032714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,1,0.01393119990825653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,3,0.013497599959373474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,3,0.0139055997133255
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,32767,0.21223840713500977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,32767,0.3628927946090698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,7,0.013406400382518769
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,7,0.01383039951324463
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,31,0.01345600038766861
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,15,0.013262400031089782
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,15,0.014473600685596466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,63,0.013935999572277069
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,63,0.014262400567531586
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,127,0.013804799318313599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,127,0.014574399590492249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,255,0.015321600437164306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,255,0.01637440025806427
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,511,0.01857600063085556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,511,0.019755199551582336
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,1023,0.02252960056066513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,1023,0.02364159971475601
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,2047,0.04040000140666962
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,2047,0.02908639907836914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,4095,0.062636798620224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,4095,0.04426240026950836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,8191,0.10706559419631959
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,8191,0.07032319903373718
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,16383,0.19701119661331176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,16383,0.12411680221557617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,fp8,1,0.013014400005340576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,32767,0.37254559993743896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,32767,0.22408480644226075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,1,0.013393600285053254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,1,0.014313599467277527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,3,0.013769599795341491
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,3,0.014326399564743042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,7,0.01385599970817566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,7,0.014294399321079254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,15,0.013952000439167023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,15,0.014302399754524232
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,31,0.013798399269580841
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,65535,0.42259678840637205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,31,0.014321599900722504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,63,0.013900800049304963
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,bfloat16,bfloat16,255,0.014281600713729858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,127,0.013924799859523773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,127,0.014633600413799287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,63,0.014027200639247894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,255,0.01648640036582947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,511,0.018995200097560883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,511,0.019619199633598327
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,1023,0.03769760131835938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,1023,0.027535998821258546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,2047,0.05933600068092346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,4095,0.1034432053565979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,4095,0.06692960262298583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,8191,0.190065598487854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,8191,0.11707680225372315
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,fp8,31,0.013631999492645264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,16383,0.21587839126586914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,16383,0.36337599754333494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,32767,0.4145631790161133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,32767,0.7103631973266602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,1,0.01135680004954338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,1,0.012091200053691863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,3,0.011388800293207168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,3,0.012148799747228623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,7,0.011435200273990632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,7,0.01228640004992485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,15,0.011472000181674958
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,15,0.01226079985499382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,31,0.0114656001329422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,31,0.012263999879360199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,63,0.011468800157308579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,63,0.012272000312805176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,127,0.011448000371456147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,127,0.01215839982032776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,65535,1.405239963531494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,255,0.012960000336170197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,bfloat16,bfloat16,65535,0.7192463874816895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,255,0.014113600552082061
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,65535,0.8130880355834961
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,bfloat16,255,0.015449599921703338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,511,0.016113600134849547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,1023,0.016790400445461272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,2047,0.016676799952983858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,1023,0.017632000148296356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,4095,0.01815039962530136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,4095,0.017243200540542604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,8191,0.01845120042562485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,bfloat16,fp8,2047,0.04198879897594452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,16383,0.022014400362968443
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,16383,0.022526399791240694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,32767,0.023971199989318848
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,32767,0.024352000653743745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,bfloat16,65535,0.02754240036010742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,65535,0.02784160077571869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,1,0.011479999870061874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,1,0.012120000272989272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,3,0.012296000123023986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,7,0.01138240024447441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,7,0.012238399684429168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,15,0.011508800089359283
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,15,0.012328000366687774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,31,0.011452800035476685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,31,0.012432000041007996
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,63,0.011460799723863602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,63,0.012439999729394913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,127,0.011459200084209442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,127,0.012297599762678146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,bfloat16,bfloat16,8191,0.05955680012702942
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,255,0.013017599284648896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,255,0.014244799315929414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,511,0.018104000389575957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,511,0.01624799966812134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,1023,0.01679680049419403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,1023,0.017735999822616578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,2047,0.016790400445461272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,2047,0.01810079962015152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,4095,0.017033599317073822
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,4095,0.018089599907398224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,8191,0.018987199664115904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,8191,0.019937600195407867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,16383,0.02067359983921051
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,16383,0.020632000267505647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,32767,0.02457599937915802
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,32767,0.024967999756336214
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,65535,0.04107039868831634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,fp8,65535,0.03244799971580505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,1,0.011494400352239609
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,1,0.01231039986014366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,3,0.011497599631547928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,7,0.011593600362539291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,7,0.012347199767827988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,15,0.011619199812412263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,15,0.012359999865293504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,31,0.011636800318956374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,31,0.012206400185823441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,63,0.011630400270223617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,63,0.012230399996042252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,127,0.011660800129175187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,127,0.012246400117874146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,255,0.0133200004696846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,255,0.014230400323867798
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,511,0.01629440039396286
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,511,0.018225599825382233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,1023,0.016897599399089813
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,1023,0.017817600071430205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,2047,0.01685439944267273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,2047,0.01794240027666092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,4095,0.017054399847984313
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,4095,0.01812320053577423
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,8191,0.019043199717998505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,8191,0.019644799828529357
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,16383,0.023574399948120116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,16383,0.022966399788856506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,32767,0.02727999985218048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,511,0.018063999712467194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,32767,0.026662400364875792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,bfloat16,65535,0.043059200048446655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,1,0.011532799899578094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,65535,0.032779198884963986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,3,0.011420799791812897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,1,0.012359999865293504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,2047,0.01791519969701767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,3,0.012039999663829803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,7,0.011432000249624253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,7,0.012427199631929398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,15,0.01212640032172203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,bfloat16,fp8,8191,0.02003840059041977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,31,0.012510399520397186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,63,0.0115167997777462
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,127,0.011671999841928482
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,63,0.012254399806261062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,127,0.01223199963569641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,255,0.013182400166988373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,255,0.014262400567531586
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,511,0.01605760008096695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,511,0.018217599391937254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,1023,0.016686399281024934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,1023,0.017798399925231932
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,bfloat16,bfloat16,3,0.011598400026559829
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,2047,0.016846400499343873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,2047,0.018016000092029572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,4095,0.017375999689102174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,4095,0.01820639967918396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,8191,0.02027679979801178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,8191,0.020216000080108643
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,16383,0.02428479939699173
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,16383,0.023966400325298308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,32767,0.039883199334144595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,32767,0.029897600412368774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,65535,0.06289280056953431
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,1,0.01255040019750595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,1,0.013470399379730224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,fp8,65535,0.046203199028968814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,3,0.012937599420547485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,3,0.01337919980287552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,7,0.012544000148773193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,15,0.012928000092506409
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,15,0.013300800323486328
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,31,0.012611199915409089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,31,0.013619199395179749
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,63,0.012680000066757202
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,63,0.013476799428462981
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,127,0.01276959925889969
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,127,0.013348799943923951
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,255,0.014268800616264343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,255,0.015478399395942689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,511,0.01743839979171753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,511,0.018787199258804323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,1023,0.019433599710464478
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,1023,0.019233599305152893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,2047,0.022392000257968902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,2047,0.023028799891471864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,4095,0.03951840102672577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,bfloat16,fp8,3,0.01218239963054657
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,4095,0.028832000494003297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,8191,0.06286879777908325
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,8191,0.04598079919815064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,16383,0.07356799840927124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,16383,0.10906720161437988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,32767,0.1966480016708374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,32767,0.12695839405059814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,1,0.013566400110721587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,3,0.01265760064125061
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,65535,0.22570400238037108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,3,0.013551999628543854
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,bfloat16,65535,0.37023200988769533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,7,0.013545599579811097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,7,0.012822400033473968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,15,0.013550400733947754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,31,0.012889599800109864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,15,0.012615999579429627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,31,0.013532799482345582
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,63,0.012904000282287598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,63,0.01348000019788742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,127,0.012705600261688233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,127,0.013582399487495423
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,255,0.01436000019311905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,255,0.015401600301265717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,511,0.017550399899482726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,511,0.01889919936656952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,1023,0.021529600024223328
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,1023,0.02264000028371811
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,2047,0.03952159881591797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,2047,0.02871679961681366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,4095,0.0607807993888855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,4095,0.04447999894618988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,15,0.011742399632930755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,8191,0.10676159858703613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,8191,0.0704367995262146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,16383,0.1936400055885315
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,16383,0.12121280431747436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,32767,0.22056479454040528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,32767,0.36668479442596436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,1,0.011631999909877778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,1,0.012251199781894683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,3,0.011655999720096588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,3,0.012272000312805176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,7,0.011748799681663513
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,7,0.012246400117874146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,15,0.01173119992017746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,fp8,65535,0.4196591854095459
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,15,0.012241599708795547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,31,0.011726400256156922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,63,0.011590400338172912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,63,0.012316799908876418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,65535,0.7143072128295899
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,127,0.011526399850845337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,127,0.012427199631929398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,255,0.014127999544143677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,511,0.016249600052833556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,255,0.0133200004696846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,511,0.018025599420070648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,1023,0.016675199568271636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,2047,0.01671999990940094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,1023,0.018057599663734436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,2047,0.01796479970216751
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,4095,0.017008000612258913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,8191,0.02041279971599579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,8191,0.019841599464416503
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,16383,0.02651199996471405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,16383,0.02656959891319275
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,32767,0.04299359917640686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,32767,0.03347519934177399
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,bfloat16,bfloat16,1,0.012913599610328674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,bfloat16,65535,0.06637759804725647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,65535,0.049486398696899414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,1,0.011446399986743927
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,1,0.012299200147390365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,3,0.011523199826478958
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,3,0.012319999933242797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,7,0.011553599685430526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,7,0.012281599640846252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,15,0.011608000099658965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,15,0.01225920021533966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,31,0.011591999977827071
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,31,0.012241599708795547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,63,0.011644800007343293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,63,0.012368000298738479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,127,0.011611200124025344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,127,0.012300799787044524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,255,0.013179199397563934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,255,0.014416000247001648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,511,0.01606079936027527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,1023,0.016811199486255646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,bfloat16,bfloat16,31,0.011420799791812897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,2047,0.017115199565887453
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,2047,0.017987200617790224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,4095,0.018900799751281738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,4095,0.018308800458908082
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,8191,0.023817600309848787
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,8191,0.023395200073719025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,16383,0.04022560119628906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,16383,0.0298335999250412
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,32767,0.0629472017288208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,32767,0.04679999947547912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,bfloat16,65535,0.10739680528640747
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,65535,0.07393919825553893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,1,0.014739200472831726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,1,0.015612800419330598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,3,0.01475519984960556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,3,0.0154448002576828
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,7,0.014678399264812469
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,7,0.015507200360298156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,15,0.014779199659824372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,31,0.012582400441169738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,31,0.014769600331783294
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,31,0.015995199978351592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,63,0.014667199552059173
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,63,0.016016000509262086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,127,0.016019199788570405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,255,0.017047999799251555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,bfloat16,fp8,4095,0.018334400653839112
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,255,0.018214400112628936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,511,0.02088160067796707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,511,0.021668800711631776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,1023,0.038940799236297605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,1023,0.0293071985244751
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,2047,0.0611840009689331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,2047,0.04285120069980621
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,4095,0.06827840209007263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,8191,0.19218720197677613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,8191,0.11864160299301148
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,511,0.017694400250911714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,bfloat16,fp8,1023,0.017796799540519714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,16383,0.3645119905471802
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,16383,0.21922240257263184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,1,0.018248000741004945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,1,0.019707199931144715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,3,0.018198400735855103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,32767,0.4167344093322754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,3,0.019793599843978882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,32767,0.7120719909667969
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,7,0.02003519982099533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,7,0.018408000469207764
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,15,0.019811199605464937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,15,0.018299199640750885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,31,0.019791999459266664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,fp8,15,0.015484799444675446
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,63,0.018310399353504182
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,63,0.01979999989271164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,127,0.018449600040912627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,127,0.019888000190258028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,255,0.02366719990968704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,127,0.014731200039386749
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,511,0.0399071991443634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,511,0.032092800736427306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,1023,0.0631983995437622
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,1023,0.047414401173591615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,2047,0.07243520021438599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,2047,0.10676800012588501
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,4095,0.19264960289001465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,4095,0.12086880207061768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,8191,0.22466559410095216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,8191,0.3664463996887207
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,bfloat16,bfloat16,4095,0.10472960472106933
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,1,0.02125599980354309
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,1,0.02293439954519272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,16383,0.4181312084197998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,16383,0.709339189529419
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,3,0.022224000096321105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,7,0.0208639994263649
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,7,0.02234400063753128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,15,0.020662400126457214
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,15,0.022358399629592896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,fp8,32767,0.8278271675109863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,31,0.020798400044441223
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,63,0.020763200521469117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,31,0.022572800517082214
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,63,0.02223999947309494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,32767,1.3965807914733888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,127,0.020630399882793426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,127,0.022259199619293214
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,255,0.025971201062202454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,511,0.042126399278640744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,255,0.024408000707626342
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,511,0.03391999900341034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,bfloat16,fp8,7,0.013350400328636169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,1023,0.06627839803695679
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,31,0.018456000089645385
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,2047,0.11002240180969239
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,2047,0.07461599707603454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,bfloat16,bfloat16,255,0.022086399793624877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,4095,0.12583359479904174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,1,0.028331199288368226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,1,0.031918400526046754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,3,0.02831520140171051
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,8191,0.22543199062347413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,3,0.03208799958229065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,8191,0.36767361164093015
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,7,0.028201600909233092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,15,0.030766400694847106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,31,0.02818560004234314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,31,0.030769601464271545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,63,0.02832320034503937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,63,0.030846399068832398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,127,0.03031040132045746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,127,0.030924800038337707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,255,0.04567359983921051
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,255,0.03936800062656402
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,511,0.06701920032501221
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,511,0.05576320290565491
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,3,0.021291199326515197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,1023,0.11245919466018676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,1023,0.08127520084381104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,2047,0.19667520523071289
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,4095,0.3714479923248291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,4095,0.23042399883270265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,8191,0.4307231903076172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,8191,0.7136559963226319
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,1,0.034041601419448855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,1,0.03736959993839264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,fp8,1023,0.05397120118141174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,3,0.03760800063610077
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,7,0.033932799100875856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,bfloat16,bfloat16,4095,0.19695520401000977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,7,0.03777759969234466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,15,0.03680799901485443
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,31,0.03471519947052002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,31,0.03684319853782654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,63,0.03458400070667267
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,bfloat16,15,0.028777599334716797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,63,0.036883199214935304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,127,0.03495840132236481
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,7,0.030958399176597595
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,255,0.05189120173454285
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,127,0.037662398815155027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,255,0.04621759951114655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,511,0.07352319955825806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,511,0.06151840090751648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,1023,0.11990400552749633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,2047,0.20315680503845215
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,2047,0.13570239543914794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,1,0.0483951985836029
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,1,0.053544002771377566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,4095,0.23902559280395508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,3,0.04806720018386841
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,3,0.053615999221801755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,4095,0.37281599044799807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,7,0.05381600260734558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,15,0.04805760085582733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,7,0.04922240078449249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,bfloat16,fp8,2047,0.12863680124282836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,31,0.04812319874763489
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,31,0.053767997026443484
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,15,0.05400639772415161
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,63,0.04972000122070312
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,63,0.053990399837493895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,127,0.05633760094642639
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,255,0.08036479949951172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,255,0.0728879988193512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,511,0.12148640155792237
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,511,0.10136640071868896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,1023,0.20922720432281494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,1023,0.15582720041275025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,2047,0.3753344058990479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,2047,0.24663839340209961
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,1,0.012600000202655792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,1,0.011699199676513672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,3,0.011766400188207626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,3,0.012460800260305405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,4095,0.4521984100341797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,7,0.011776000261306763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,3,0.033983999490737916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,15,0.011736000329256058
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,bfloat16,4095,0.7254303932189942
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,7,0.01271039992570877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,31,0.011664000153541566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,63,0.011972799897193909
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,63,0.012412799894809723
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,bfloat16,15,0.033955198526382444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,31,0.012606400251388549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,127,0.012041600048542022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,127,0.01250080019235611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,255,0.013499200344085693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,511,0.016657599806785585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,511,0.018488000333309173
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,255,0.014534400403499603
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,1023,0.01706079989671707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,1023,0.01836480051279068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,2047,0.01833920031785965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,4095,0.01908160001039505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,4095,0.01881600022315979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,8191,0.02446720004081726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,8191,0.02417919933795929
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,16383,0.042843198776245116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,16383,0.03264800012111664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,32767,0.06599360108375549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,32767,0.04946880042552948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,65535,0.11030880212783814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,bfloat16,fp8,1023,0.08904640078544616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,1,0.012206400185823441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,65535,0.0778335988521576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,1,0.01265919953584671
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,3,0.01290079951286316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,7,0.011769600212574005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,7,0.012641599774360657
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,15,0.012222400307655335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,15,0.012600000202655792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,31,0.01202400028705597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,31,0.012929600477218629
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,63,0.01185920014977455
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,63,0.01268800050020218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,127,0.012027200311422348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,127,0.012715199589729309
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,255,0.013756799697875976
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,255,0.014839999377727509
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,511,0.016569599509239197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,511,0.018262399733066557
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,1023,0.017294399440288544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,1023,0.018291200697422027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,2047,0.01897760033607483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,2047,0.018787199258804323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,4095,0.022752000391483305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,4095,0.022060799598693847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,8191,0.0401775985956192
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,16383,0.06152960062026978
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,bfloat16,fp8,127,0.05566080212593079
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,16383,0.04512960016727448
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,32767,0.10615040063858032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,65535,0.1926751971244812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,65535,0.11972320079803467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,bfloat16,1,0.05520480275154114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,fp8,1,0.060046398639678956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,bfloat16,3,0.055343997478485105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,fp8,3,0.059915202856063846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,bfloat16,7,0.05532479882240295
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,fp8,7,0.05992159843444824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,bfloat16,15,0.055236798524856565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,fp8,15,0.06010559797286987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,bfloat16,31,0.05529119968414307
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,fp8,31,0.059952002763748166
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,bfloat16,63,0.056707197427749635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,fp8,63,0.06007999777793884
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,fp8,15,0.012481600046157837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,fp8,127,0.06101920008659363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,bfloat16,255,0.08123360276222229
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,fp8,255,0.0775488018989563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,bfloat16,511,0.1252351999282837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,fp8,511,0.10841280221939087
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,bfloat16,1023,0.21378400325775146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,fp8,1023,0.15898720026016236
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,bfloat16,3,0.011564800143241882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,fp8,2047,0.2527904033660889
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,bfloat16,1,0.08324959874153137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,bfloat16,2047,0.38083999156951903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,bfloat16,3,0.08307679891586303
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,fp8,1,0.09338560104370117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,bfloat16,7,0.08327999711036682
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,fp8,7,0.09325119853019714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,bfloat16,15,0.08312000036239624
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,8191,0.030320000648498536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,fp8,15,0.09342880249023437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,bfloat16,31,0.08276000022888183
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,bfloat16,63,0.08757280111312866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,fp8,31,0.09489279985427856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,fp8,63,0.09640480279922485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,bfloat16,fp8,32767,0.06966879963874817
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,bfloat16,127,0.09340800046920776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,fp8,127,0.10283039808273316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,bfloat16,255,0.13661760091781616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,fp8,255,0.1280287981033325
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,bfloat16,511,0.21568000316619873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,fp8,511,0.19031039476394654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,fp8,1023,0.28776159286499026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,bfloat16,1023,0.38131520748138426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,bfloat16,1,0.09459679722785949
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,fp8,1,0.1052240014076233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,bfloat16,3,0.09358879923820496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,bfloat16,bfloat16,127,0.06304799914360046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,fp8,2047,0.4806511878967285
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,fp8,3,0.10511360168457032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,bfloat16,2047,0.7190911769866943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,bfloat16,7,0.09355040192604065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,fp8,7,0.10617920160293579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,fp8,15,0.10527360439300537
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,bfloat16,15,0.09441120028495789
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,bfloat16,bfloat16,2047,0.017239999771118165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,bfloat16,31,0.09380639791488647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,fp8,63,0.10930559635162354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,bfloat16,127,0.10622719526290894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,fp8,127,0.11405600309371948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,bfloat16,255,0.1439039945602417
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,fp8,255,0.13957439661026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,bfloat16,511,0.21816000938415528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,fp8,511,0.20210559368133546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,bfloat16,1,0.14928799867630005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,bfloat16,1023,0.3895776033401489
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,fp8,1023,0.3007359981536865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,fp8,1,0.17110879421234132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,bfloat16,3,0.1492079973220825
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,fp8,7,0.17112319469451903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,bfloat16,fp8,3,0.09396960139274597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,bfloat16,15,0.1490671992301941
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,fp8,15,0.17154879570007325
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,bfloat16,31,0.1503119945526123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,fp8,31,0.17310080528259278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,bfloat16,63,0.15653760433197023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,fp8,63,0.18083360195159912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,bfloat16,127,0.1657472014427185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,fp8,127,0.18618079423904418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,bfloat16,255,0.23738880157470704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,fp8,255,0.24070560932159424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,bfloat16,511,0.4019360065460205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,fp8,511,0.35658879280090333
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,1,0.011744000017642975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,1,0.012505599856376648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,3,0.011715199798345566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,bfloat16,1023,0.7311872005462646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,7,0.01194560006260872
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,fp8,31,0.10578080415725707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,bfloat16,bfloat16,63,0.10104800462722778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,fp8,1023,0.5570432186126709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,15,0.011791999638080596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,15,0.012700800597667695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,31,0.012468799948692322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,31,0.011985599994659424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,63,0.011963199824094772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,127,0.012012799829244613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,127,0.012671999633312225
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,255,0.013580800592899322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,255,0.014699199795722961
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,511,0.01666239947080612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,511,0.018297599256038667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,1023,0.01698080003261566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,1023,0.0182559996843338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,2047,0.01855359971523285
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,2047,0.018643200397491455
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,4095,0.022086399793624877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,4095,0.022121599316596983
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,8191,0.040294399857521056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,8191,0.02948000133037567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,fp8,3,0.17062079906463623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,bfloat16,bfloat16,7,0.14724160432815553
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,16383,0.06443359851837158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,16383,0.047958400845527646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,32767,0.10863200426101685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,1,0.012305600196123123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,65535,0.1245743989944458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,1,0.012428800016641617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,bfloat16,65535,0.1936400055885315
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,3,0.012089599668979645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,7,0.011667200177907944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,3,0.012856000661849975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,15,0.012100800126791
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,15,0.012464000284671784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,7,0.012516799569129943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,31,0.012140800058841706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,63,0.011742399632930755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,63,0.01284639984369278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,127,0.0121568001806736
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,127,0.012891200184822083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,255,0.013716800510883332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,255,0.014788800477981567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,511,0.016889600455760954
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,511,0.018505600094795228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,1023,0.01881919950246811
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,1023,0.018185600638389587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,2047,0.021006399393081666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,2047,0.02224159985780716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,4095,0.038206401467323306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,4095,0.028121599555015565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,8191,0.06160640120506287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,8191,0.044675201177597046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,16383,0.10623999834060668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,16383,0.07024319767951966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,32767,0.12071199417114258
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,32767,0.19258719682693481
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,3,0.012494400143623352
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,1,0.013313600420951843
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,1,0.014155200123786927
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,3,0.01334560066461563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,7,0.012680000066757202
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,65535,0.21836960315704346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,7,0.013603200018405915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,bfloat16,65535,0.3676000118255615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,7,0.0143312007188797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,15,0.01361439973115921
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,63,0.01252799928188324
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,31,0.013513599336147309
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,31,0.014073599874973298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,63,0.013387200236320496
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,63,0.014003199338912965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,127,0.013841600716114044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,127,0.01414719969034195
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,255,0.015107199549674988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,255,0.016364799439907075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,511,0.01817920058965683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,511,0.01987839937210083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,1023,0.02329760044813156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,1023,0.02344000041484833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,2047,0.03993119895458221
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,2047,0.030294400453567506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,4095,0.06144639849662781
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,4095,0.04415999948978424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,8191,0.10797920227050781
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,8191,0.07093600034713746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,16383,0.19853919744491577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,16383,0.12812960147857666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,bfloat16,fp8,32767,0.07407839894294739
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,32767,0.37360320091247556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,32767,0.22528479099273682
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,bfloat16,fp8,31,0.012899200618267059
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,1,0.011503999680280685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,1,0.01226240023970604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,3,0.01154400035738945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,3,0.012291199713945388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,65535,0.7205023765563965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,7,0.011502400040626526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,15,0.011558400094509124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,7,0.012457600235939026
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,15,0.012388800084590913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,31,0.012361600250005721
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,63,0.011668799817562104
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,63,0.012383999675512314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,127,0.011670400202274323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,127,0.012296000123023986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,255,0.013143999874591828
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,131071,0.8249152183532715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,255,0.014259199798107147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,511,0.018134400248527527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,511,0.016164800524711607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,1023,0.01679680049419403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,bfloat16,131071,1.4153375625610352
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,2047,0.01685439944267273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,1023,0.017977599799633027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,4095,0.017153599858283998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,4095,0.018220800161361694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,2047,0.01807200014591217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,8191,0.019254399836063384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,15,0.014044800400733947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,8191,0.01987839937210083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,16383,0.023742400109767914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,32767,0.023940800130367278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,32767,0.023713600635528565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,65535,0.027822399139404298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,65535,0.02810400128364563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,131071,0.04735519886016846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,1,0.011456000059843064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,1,0.01231520026922226
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,3,0.011430399864912033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,3,0.012319999933242797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,7,0.011601600050926208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,7,0.012302400171756744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,15,0.01159999966621399
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,15,0.012358400225639343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,31,0.011574400216341018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,31,0.012332800030708312
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,63,0.011521600186824799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,63,0.012462399899959564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,127,0.011572799831628799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,127,0.01233920007944107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,255,0.014337599277496338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,511,0.016371199488639833
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,511,0.018177600204944612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,1023,0.016891199350357055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,2047,0.016969600319862367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,2047,0.01825280040502548
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,4095,0.017164799571037292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,65535,0.42804641723632814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,4095,0.018220800161361694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,8191,0.019099199771881105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,31,0.011664000153541566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,8191,0.020049600303173064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,16383,0.024081599712371827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,16383,0.023496000468730925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,32767,0.027382400631904603
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,32767,0.027081599831581114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,65535,0.04341599941253662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,65535,0.033199998736381534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,131071,0.06690559983253479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,131071,0.04958080053329468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,1,0.012608000636100769
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,1,0.013523200154304504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,3,0.012878400087356568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,3,0.013222399353981017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,bfloat16,fp8,3,0.014257599413394929
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,7,0.012862400710582733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,7,0.01321759968996048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,15,0.013716800510883332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,31,0.012390399724245072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,31,0.013687999546527862
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,63,0.01263359934091568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,63,0.013364799320697784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,127,0.012678399682044983
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,bfloat16,16383,0.022416000068187714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,127,0.013713599741458892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,255,0.01446399986743927
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,255,0.015494400262832641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,511,0.01727039963006973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,bfloat16,fp8,131071,0.03621279895305633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,1023,0.019487999379634857
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,2047,0.022729599475860597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,4095,0.03989279866218567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,4095,0.031224000453948974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,8191,0.06319680213928222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,bfloat16,255,0.013099199533462525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,8191,0.04666399955749512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,16383,0.10994240045547485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,16383,0.07572799921035767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,bfloat16,fp8,1023,0.018163199722766876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,32767,0.12741119861602784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,32767,0.19825760126113892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,65535,0.3728912115097046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,65535,0.22659039497375488
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,1,0.011667200177907944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,1,0.012380799651145935
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,3,0.011640000343322753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,3,0.012307199835777282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,131071,0.7214911937713623
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,7,0.01159999966621399
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,131071,0.4209727764129639
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,15,0.011611200124025344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,7,0.01213119998574257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,31,0.011609599739313126
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,63,0.011611200124025344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,15,0.012444800138473511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,31,0.012176000326871873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,63,0.012486399710178375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,127,0.012190400063991547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,127,0.01162400022149086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,255,0.014641599357128143
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,511,0.016062399744987486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,511,0.01825920045375824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,1023,0.0170864000916481
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,1023,0.018188799917697906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,15,0.012567999958992004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,2047,0.017089599370956422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,2047,0.01833759993314743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,4095,0.017284800112247468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,8191,0.020561599731445314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,4095,0.018196800351142885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,8191,0.020638400316238405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,16383,0.02696000039577484
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,16383,0.02685759961605072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,32767,0.042484799027442934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,65535,0.06613439917564393
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,511,0.019147199392318726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,65535,0.04928799867630005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,bfloat16,1023,0.019815999269485473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,131071,0.11140320301055909
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,1,0.014660799503326416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,1,0.015537600219249725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,131071,0.0770687997341156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,bfloat16,fp8,2047,0.023284800350666046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,3,0.015316799283027649
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,7,0.014681600034236908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,7,0.015806399285793304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,15,0.015003199875354766
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,15,0.015607999265193939
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,31,0.015673600137233734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,63,0.015483200550079346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,127,0.01496960073709488
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,127,0.015556800365447997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,255,0.016625599563121797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,255,0.01780959963798523
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,511,0.01972319930791855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,511,0.020937600731849672
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,1023,0.03888320028781891
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,1023,0.02869119942188263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,2047,0.05988479852676391
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,2047,0.04315199851989746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,4095,0.10390080213546753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,4095,0.06851040124893189
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,8191,0.19440640211105348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,8191,0.1199023962020874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,16383,0.22222719192504883
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,16383,0.36555359363555906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,32767,0.42107839584350587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,bfloat16,255,0.013131199777126313
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,32767,0.7136544227600098
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,1,0.020931200683116914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,1,0.02260800004005432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,bfloat16,fp8,32767,0.0335312008857727
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,3,0.020883199572563172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,7,0.020820799469947814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,3,0.022427199780941008
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,7,0.022651199996471406
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,15,0.020960000157356263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,15,0.02255360037088394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,31,0.02096640020608902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,3,0.015831999480724335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,31,0.02245279997587204
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,31,0.015143999457359314
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,63,0.01512639969587326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,127,0.021062399446964263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,127,0.02285120040178299
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,255,0.02460640072822571
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,255,0.026868799328804018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,fp8,65535,0.8096207618713379
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,511,0.034390398859977724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,1023,0.0670304000377655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,2047,0.10844160318374634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,2047,0.07429119944572449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,1023,0.05377119779586792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,bfloat16,bfloat16,65535,1.4112079620361329
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,4095,0.12523839473724366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,4095,0.19367680549621583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,8191,0.36839520931243896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,8191,0.24261119365692138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,1,0.03388000130653381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,1,0.037049600481987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,3,0.03419679999351501
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,7,0.0338239997625351
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,3,0.03703359961509704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,16383,0.42139039039611814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,7,0.036929601430892946
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,15,0.03407360017299652
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,16383,0.7141551971435547
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,31,0.033792001008987424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,15,0.03705120086669922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,63,0.03687039911746979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,63,0.03357439935207367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,127,0.03461759984493255
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,127,0.03684319853782654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,255,0.04527359902858734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,255,0.051425600051879884
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,511,0.07270560264587403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,511,0.06219519972801209
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,1023,0.08866400122642518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,1023,0.11848479509353638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,2047,0.20214400291442872
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,2047,0.1366976022720337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,4095,0.37379839420318606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,4095,0.23871359825134278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,1,0.011952000111341477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,63,0.02110079973936081
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,3,0.011870399862527848
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,fp8,63,0.02282720059156418
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,1,0.012596799433231354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,3,0.012521600723266602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,15,0.011804799735546111
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,7,0.012503999471664428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,15,0.012451200187206269
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,31,0.011761599779129028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,31,0.012511999905109405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,63,0.011798399686813354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,bfloat16,bfloat16,511,0.04253279864788055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,63,0.01250080019235611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,127,0.01178240031003952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,127,0.012513600289821625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,bfloat16,8191,0.7200607776641845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,255,0.014502400159835815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,255,0.01340479999780655
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,8191,0.43325119018554686
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,511,0.017950400710105896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,1023,0.018326400220394133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,2047,0.017528000473976135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,4095,0.018939200043678283
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,1023,0.016873599588871004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,4095,0.01895039975643158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,2047,0.01804639995098114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,8191,0.024404799938201903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,8191,0.024243199825286867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,16383,0.03367680013179779
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,16383,0.04298399984836578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,32767,0.050836801528930664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,32767,0.06633920073509217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,65535,0.11072959899902343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,65535,0.07811359763145446
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,1,0.055113601684570315
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,131071,0.20008161067962646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,fp8,131071,0.13142080307006837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,3,0.05504159927368164
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,7,0.05497599840164184
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,3,0.05972639918327331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,15,0.05517280101776123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,15,0.05952960252761841
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,31,0.0555184006690979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,31,0.059759998321533205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,63,0.05589119791984558
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,bfloat16,fp8,31,0.037110400199890134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,63,0.059673601388931276
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,127,0.06240479946136475
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,127,0.06253759860992432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,255,0.08363839983940125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,255,0.07851679921150208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,511,0.12739200592041017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,1023,0.21352159976959229
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,1023,0.1622447967529297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,2047,0.3806272029876709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,2047,0.25799999237060545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,7,0.011750400066375732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,bfloat16,1,0.09482719898223876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,bfloat16,bfloat16,511,0.01640480011701584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,fp8,1,0.10656160116195679
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,4095,0.4575520038604736
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,bfloat16,3,0.09395040273666382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,fp8,3,0.10641759634017944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,bfloat16,4095,0.7246511936187744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,bfloat16,7,0.09473440051078796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,bfloat16,15,0.09490879774093627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,bfloat16,31,0.09476320147514343
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,fp8,31,0.10707999467849731
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,bfloat16,63,0.10083520412445068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,fp8,63,0.10960960388183594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,1,0.06122080087661743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,7,0.06068320274353027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,fp8,127,0.1144927978515625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,bfloat16,255,0.14355360269546508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,fp8,255,0.14215680360794067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,bfloat16,511,0.22006239891052246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,fp8,511,0.20254080295562743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,bfloat16,1023,0.38928799629211425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,fp8,1023,0.306494402885437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,bfloat16,fp8,511,0.10766079425811767
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,1,0.01194560006260872
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,1,0.012468799948692322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,3,0.01186719983816147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,3,0.012318400293588638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,7,0.011860799789428712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,7,0.012305600196123123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,15,0.012008000165224075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,bfloat16,2047,0.7259200096130372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,15,0.012326399981975555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,31,0.011987199634313583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,63,0.011795199662446975
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,63,0.012729600071907043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,31,0.012415999919176102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,127,0.011851199716329575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,127,0.012768000364303589
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,fp8,2047,0.49660959243774416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,255,0.01465280055999756
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,511,0.016680000722408293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,255,0.01345600038766861
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,1023,0.018606400489807128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,2047,0.01913439929485321
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,511,0.01833920031785965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,2047,0.01895039975643158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,4095,0.022566400468349457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,4095,0.02240640074014664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,8191,0.04082399904727936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,16383,0.06549440026283264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,16383,0.04876640141010284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,32767,0.10958880186080933
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,32767,0.07524480223655701
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,65535,0.12632160186767577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,65535,0.1958367943763733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,fp8,7,0.10529279708862305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,fp8,15,0.10687199831008912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,131071,0.22608799934387208
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,bfloat16,bfloat16,127,0.10597120523452759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,1023,0.017492799460887908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,fp8,8191,0.029977598786354066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,bfloat16,bfloat16,131071,0.37151360511779785
