framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,1,0.012939199805259705
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,1,0.012238399684429168
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,1,0.012094400078058242
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,1,0.012164799869060517
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,1,0.012139199674129486
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,1,0.012027200311422348
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,1,0.01268800050020218
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,1,0.018651199340820313
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,1,0.017948800325393678
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,1,0.018001599609851836
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,1,0.018321600556373597
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,1,0.017924800515174866
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,1,0.01802079975605011
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,1,0.017907199263572694
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,3,0.013014400005340576
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,3,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,3,0.012129600346088409
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,3,0.012380799651145935
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,3,0.012080000340938568
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,3,0.012292800098657608
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,3,0.012142399698495865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,3,0.018739199638366698
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,3,0.01823199987411499
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,3,0.01803999990224838
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,3,0.018188799917697906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,3,0.018104000389575957
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,3,0.017875200510025023
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,3,0.01815840005874634
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,7,0.01284320056438446
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,7,0.012583999335765839
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,7,0.012095999717712403
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,7,0.012302400171756744
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,7,0.012270399928092956
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,7,0.012163200229406358
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,7,0.018142400681972502
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,7,0.018883199989795686
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,7,0.01804479956626892
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,7,0.017987200617790224
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,7,0.018036800622940063
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,7,0.01788319945335388
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,7,0.018129600584506987
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,15,0.01300320029258728
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,15,0.012678399682044983
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,15,0.012368000298738479
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,15,0.012297599762678146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,15,0.01218079999089241
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,15,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,15,0.018801599740982056
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,15,0.012169600278139115
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,15,0.018452799320220946
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,15,0.018193599581718446
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,15,0.018036800622940063
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,15,0.01802079975605011
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,15,0.018147200345993042
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,15,0.017931200563907623
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,31,0.01313440054655075
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,31,0.012723200023174286
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,31,0.012583999335765839
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,31,0.012399999797344208
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,31,0.01852640062570572
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,31,0.012294399738311767
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,31,0.01825920045375824
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,31,0.017953599989414214
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,31,0.018223999440670012
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,31,0.017948800325393678
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,31,0.01828320026397705
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,63,0.01318880021572113
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,63,0.012566399574279786
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,63,0.012425599992275238
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,63,0.012193600088357926
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,63,0.01244800016283989
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,63,0.01886879950761795
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,63,0.018535999953746794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,63,0.018156799674034118
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,63,0.018310399353504182
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,63,0.018212799727916718
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,63,0.018193599581718446
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,63,0.018369600176811218
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,127,0.01467519998550415
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,127,0.014552000164985656
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,127,0.014057600498199463
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,127,0.014075200259685516
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,127,0.014019200205802917
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,127,0.0141184002161026
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,127,0.014035199582576752
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,127,0.020681600272655486
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,127,0.020183999836444855
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,127,0.020075200498104094
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,127,0.019841599464416503
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,127,0.019760000705718993
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,127,0.01998720020055771
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,127,0.019865599274635316
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,255,0.01797119975090027
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,255,0.017310400307178498
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,255,0.0173552006483078
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,255,0.017123199999332428
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,255,0.01722559928894043
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,255,0.016979199647903443
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,255,0.017185600101947786
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,255,0.02388000041246414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,255,0.02314720004796982
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,255,0.023095999658107758
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,255,0.02288320064544678
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,255,0.022785599529743194
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,255,0.02282399982213974
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,255,0.022779199481010436
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,511,0.018812799453735353
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,511,0.017686399817466735
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,511,0.016894400119781494
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,511,0.016515199840068818
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,511,0.017627200484275816
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,511,0.017820799350738527
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,511,0.017791999876499175
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,511,0.02499680072069168
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,511,0.023792000114917757
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,511,0.023104000091552734
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,511,0.022700800001621245
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,511,0.02383680045604706
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,511,0.0241007998585701
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,511,0.023956799507141115
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,1023,0.019108800590038298
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,1023,0.017995199561119078
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,1023,0.017108799517154695
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,1023,0.016791999340057373
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,1023,0.017740799486637114
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,1023,0.018025599420070648
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,7,0.01223519966006279
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,1023,0.01780160069465637
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,1023,0.024340799450874327
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,1023,0.023651200532913207
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,1023,0.023182399570941925
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,1023,0.025491198897361754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,1023,0.024225600063800812
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,1023,0.02431199997663498
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,1023,0.024348799884319306
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,2047,0.019891199469566346
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,2047,0.018681600689888
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,2047,0.017735999822616578
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,2047,0.017334400117397307
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,2047,0.017791999876499175
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,2047,0.018080000579357148
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,2047,0.01799360066652298
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,2047,0.028271999955177308
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,2047,0.02720000147819519
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,2047,0.02606239914894104
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,2047,0.026425600051879883
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,2047,0.02632479965686798
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,2047,0.026471999287605286
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,2047,0.026449599862098695
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,4095,0.02255679965019226
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,4095,0.020585599541664123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,4095,0.019556799530982973
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,4095,0.01987680047750473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,4095,0.020448000729084016
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,4095,0.020440000295639037
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,4095,0.02025440037250519
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,4095,0.03359839916229248
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,4095,0.032041600346565245
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,4095,0.03112800121307373
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,4095,0.03067359924316406
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,4095,0.031760001182556154
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,4095,0.031748801469802856
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,4095,0.03166559934616089
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,8191,0.025323200225830077
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,8191,0.024003200232982635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,8191,0.022150400280952453
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,8191,0.02242400050163269
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,8191,0.02390879988670349
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,8191,0.024302400648593903
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,8191,0.02412640005350113
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,8191,0.043793600797653195
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,8191,0.04061599969863892
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,8191,0.039526399970054624
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,8191,0.03923200070858002
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,8191,0.04084160029888153
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,8191,0.04123519957065582
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,8191,0.04116320013999939
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,16383,0.029660800099372865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,16383,0.026825600862503053
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,16383,0.026075199246406555
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,16383,0.025678399205207824
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,16383,0.026609599590301514
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,16383,0.026151999831199646
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,16383,0.025860801339149475
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,16383,0.06614720225334167
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,16383,0.060969597101211546
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,16383,0.056704002618789676
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,16383,0.056668800115585324
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,16383,0.055979198217391966
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,16383,0.05712800025939942
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,16383,0.05607680082321167
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,32767,0.04256319999694824
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,32767,0.03858560025691986
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,32767,0.033188799023628236
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,32767,0.03083840012550354
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,32767,0.0313616007566452
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,32767,0.030907198786735535
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,32767,0.031112000346183777
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,32767,0.10214879512786865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,32767,0.0995136022567749
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,32767,0.09820640087127686
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,32767,0.0981935977935791
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,32767,0.09765599966049195
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,32767,0.09734560251235962
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,32767,0.09744160175323487
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,65535,0.05661119818687439
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,65535,0.05413920283317566
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,65535,0.04951040148735046
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,65535,0.05067520141601563
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,65535,0.05197759866714478
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,65535,0.049235200881958006
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,65535,0.0502128005027771
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,65535,0.17446399927139283
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,65535,0.17250399589538573
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,65535,0.17010719776153566
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,65535,0.1710592031478882
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,65535,0.16937439441680907
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,65535,0.1702415943145752
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,65535,0.16965759992599488
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,131071,0.0875760018825531
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,131071,0.08127679824829101
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,131071,0.07842559814453125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,131071,0.07509599924087525
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,131071,0.07640479803085327
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,131071,0.07484319806098938
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,131071,0.07592960000038147
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,131071,0.3160655975341797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,131071,0.31415839195251466
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,131071,0.3110975980758667
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,131071,0.3115695953369141
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,63,0.0123648002743721
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,63,0.012222400307655335
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,131071,0.30954880714416505
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,31,0.012238399684429168
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,31,0.012163200229406358
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,131071,0.3117536067962646
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,1,0.012804800271987915
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,1,0.012379200011491776
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,31,0.018926399946212768
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,1,0.012246400117874146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,1,0.012283200025558471
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,1,0.012083200365304947
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,1,0.012080000340938568
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,1,0.012095999717712403
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,1,0.01860480010509491
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,1,0.01863040030002594
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,1,0.017531199753284453
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,1,0.017847999930381775
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,1,0.017795200645923614
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,3,0.012942400574684144
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,1,0.01759520024061203
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,3,0.012342400103807449
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,1,0.01764000058174133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,3,0.012328000366687774
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,3,0.01223680004477501
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,3,0.012108799815177918
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,131071,0.3098383903503418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,3,0.012038400024175644
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,3,0.012417600303888322
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,3,0.018702399730682374
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,3,0.017657600343227386
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,3,0.017531199753284453
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,3,0.01785759925842285
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,3,0.01764959990978241
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,3,0.017827199399471284
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,7,0.013067199289798737
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,7,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,3,0.01758880019187927
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,7,0.012062399834394454
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,7,0.012241599708795547
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,7,0.012123200297355651
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,7,0.01233920007944107
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,7,0.012243200093507767
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,7,0.01839520037174225
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,7,0.017772799730300902
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,7,0.017843200266361235
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,7,0.017635199427604675
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,7,0.01789119988679886
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,7,0.017771199345588684
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,7,0.017774400115013123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,15,0.012936000525951386
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,15,0.0124208003282547
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,15,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,15,0.012303999811410903
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,15,0.012297599762678146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,15,0.012305600196123123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,15,0.01228799968957901
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,15,0.018611200153827667
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,15,0.018060800433158875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,15,0.018052799999713896
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,15,0.017867200076580048
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,15,0.017603200674057008
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,15,0.017903999984264375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,15,0.017953599989414214
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,31,0.012915199995040894
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,31,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,31,0.012375999987125397
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,31,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,31,0.012099199742078782
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,31,0.012254399806261062
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,31,0.012283200025558471
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,31,0.018535999953746794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,31,0.018110400438308714
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,31,0.017876799404621124
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,31,0.01788640022277832
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,31,0.017723199725151063
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,31,0.017803199589252472
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,31,0.01772159934043884
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,63,0.012665599584579468
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,63,0.013120000064373017
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,63,0.012439999729394913
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,63,0.012201599776744843
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,63,0.01231520026922226
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,63,0.012331199645996094
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,63,0.01223360002040863
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,63,0.018411199748516082
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,63,0.018350400030612946
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,63,0.018119999766349794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,63,0.01791680008172989
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,63,0.01772159934043884
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,63,0.017987200617790224
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,63,0.017800000309944154
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,127,0.014873600006103516
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,127,0.014302399754524232
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,127,0.013935999572277069
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,127,0.01408800035715103
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,127,0.01398559957742691
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,127,0.013915200531482697
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,127,0.01388320028781891
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,127,0.01976799964904785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,127,0.019491200149059296
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,127,0.020529599487781526
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,127,0.019648000597953796
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,127,0.019524799287319185
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,127,0.019495999813079833
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,255,0.01788640022277832
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,127,0.019523200392723084
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,255,0.0171424001455307
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,255,0.017392000555992125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,255,0.017203199863433837
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,255,0.017025600373744964
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,255,0.017059199512004852
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,255,0.01720480024814606
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,255,0.02359839975833893
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,255,0.022907200455665588
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,255,0.023019200563430785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,255,0.022881600260734557
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,255,0.022758400440216063
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,255,0.02287680059671402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,255,0.022987200319766997
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,511,0.017766399681568144
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,511,0.017097599804401398
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,511,0.017910400032997133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,511,0.025179201364517213
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,511,0.024051199853420257
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,511,0.018955199420452117
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,511,0.016950400173664094
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,511,0.023668800294399262
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,511,0.02407840043306351
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,511,0.018246400356292724
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,511,0.02285760045051575
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,511,0.01822720021009445
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,511,0.02433760017156601
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,511,0.024191999435424806
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,1023,0.019406400620937347
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,1023,0.018265600502490997
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,1023,0.01743679940700531
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,1023,0.017188799381256104
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,1023,0.01815039962530136
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,1023,0.018248000741004945
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,1023,0.01814880073070526
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,1023,0.02646239995956421
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,1023,0.02786239981651306
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,1023,0.02569279968738556
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,1023,0.025254398584365845
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,1023,0.026015999913215637
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,1023,0.02629759907722473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,1023,0.025911998748779298
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,2047,0.020503999292850496
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,2047,0.01913280040025711
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,2047,0.01844319999217987
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,2047,0.01825920045375824
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,2047,0.01845120042562485
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,2047,0.018598400056362152
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,2047,0.01846559941768646
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,2047,0.0319568008184433
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,2047,0.02924320101737976
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,2047,0.030027198791503906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,2047,0.029153600335121155
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,2047,0.02941280007362366
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,2047,0.0293503999710083
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,2047,0.02914400100708008
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,4095,0.022867199778556824
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,4095,0.020870399475097657
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,4095,0.019683200120925903
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,4095,0.020311999320983886
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,4095,0.020502400398254395
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,4095,0.02033119946718216
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,4095,0.02072319984436035
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,4095,0.04072319865226746
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,4095,0.03817279934883118
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,4095,0.03658719956874847
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,4095,0.03747679889202118
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,4095,0.03774240016937256
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,4095,0.03758879899978638
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,4095,0.03803200125694275
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,8191,0.02362399995326996
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,8191,0.022499200701713563
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,8191,0.023094399273395537
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,8191,0.028004801273345946
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,8191,0.02513279914855957
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,8191,0.025791999697685242
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,8191,0.0255295991897583
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,8191,0.05769439935684204
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,8191,0.06325119733810425
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,8191,0.05312960147857666
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,8191,0.05602560043334961
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,8191,0.05461440086364746
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,8191,0.05533440113067627
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,8191,0.055553597211837766
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,16383,0.039259201288223265
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,16383,0.029254400730133058
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,16383,0.03497599959373474
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,16383,0.029254400730133058
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,16383,0.030031999945640563
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,16383,0.02980639934539795
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,16383,0.027244800329208375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,16383,0.09433599710464477
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,16383,0.09922080039978028
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,16383,0.0960319995880127
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,16383,0.09679679870605469
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,16383,0.09484480023384094
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,16383,0.09596480131149292
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,16383,0.09656479954719543
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,32767,0.05434880256652832
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,32767,0.04863840043544769
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,32767,0.04981760084629059
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,32767,0.04788320064544678
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,32767,0.048582398891448976
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,32767,0.0482448011636734
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,32767,0.04644320011138916
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,32767,0.17259039878845214
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,32767,0.16706399917602538
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,32767,0.16974400281906127
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,32767,0.16669280529022218
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,32767,0.16947519779205322
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,32767,0.16956640481948854
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,32767,0.1694591999053955
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,65535,0.08243520259857177
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,65535,0.0771120011806488
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,65535,0.07407839894294739
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,65535,0.07236800193786622
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,65535,0.07215999960899352
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,65535,0.07433599829673768
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,65535,0.07393119931221008
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,65535,0.3136415958404541
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,65535,0.31082720756530763
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,65535,0.3080559968948364
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,65535,0.30734078884124755
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,65535,0.3095983982086182
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,65535,0.30820479393005373
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,131071,0.13224320411682128
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,65535,0.308734393119812
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,131071,0.12672319412231445
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,131071,0.12440799474716187
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,131071,0.12247359752655029
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,131071,0.12281279563903809
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,131071,0.12343519926071167
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,131071,0.12350720167160034
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,131071,0.5880640029907227
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,131071,0.5844511985778809
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,131071,0.5801280021667481
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,131071,0.5831759929656982
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,131071,0.580624008178711
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,131071,0.5814288139343262
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,1,0.013121600449085235
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,1,0.012380799651145935
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,1,0.012292800098657608
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,1,0.012227199971675873
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,1,0.012345600128173827
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,1,0.012144000083208085
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,1,0.01228479966521263
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,1,0.018878400325775146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,1,0.018214400112628936
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,1,0.018195199966430663
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,1,0.017892800271511078
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,1,0.018012799322605133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,1,0.018007999658584593
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,3,0.013092799484729767
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,1,0.01793439984321594
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,3,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,3,0.0123648002743721
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,3,0.01220960021018982
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,3,0.012291199713945388
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,3,0.012206400185823441
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,3,0.012169600278139115
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,3,0.018783999979496
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,3,0.018427200615406036
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,131071,0.5787919998168946
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,3,0.01807200014591217
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,3,0.017977599799633027
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,3,0.01789119988679886
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,3,0.01812479943037033
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,3,0.01791999936103821
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,7,0.013038399815559387
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,7,0.01236959993839264
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,7,0.012596799433231354
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,7,0.01223680004477501
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,7,0.012225600332021714
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,7,0.012303999811410903
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,7,0.012142399698495865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,7,0.018343999981880188
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,7,0.019308799505233766
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,7,0.018024000525474548
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,7,0.01783200055360794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,7,0.01791999936103821
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,7,0.017998400330543517
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,7,0.01807200014591217
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,15,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,15,0.012238399684429168
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,15,0.013262400031089782
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,15,0.012297599762678146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,15,0.012323199957609176
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,15,0.012334399670362473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,15,0.012303999811410903
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,15,0.018705600500106813
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,15,0.018379199504852294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,15,0.01820479929447174
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,15,0.01804639995098114
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,15,0.0180976003408432
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,15,0.018007999658584593
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,15,0.01799200028181076
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,31,0.013120000064373017
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,31,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,31,0.01284160017967224
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,31,0.012272000312805176
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,31,0.012334399670362473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,31,0.012116800248622894
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,31,0.012244799733161926
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,31,0.018862399458885192
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,31,0.018089599907398224
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,31,0.018459199368953703
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,31,0.018028800189495087
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,31,0.01789119988679886
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,31,0.01793439984321594
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,31,0.01802240014076233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,63,0.01313759982585907
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,63,0.012751999497413635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,63,0.012129600346088409
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,63,0.012556800246238708
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,63,0.012300799787044524
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,63,0.01226079985499382
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,63,0.018878400325775146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,63,0.012374400347471236
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,63,0.018400000035762788
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,63,0.018279999494552612
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,63,0.018160000443458557
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,63,0.018156799674034118
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,63,0.0180976003408432
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,127,0.014793600142002105
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,127,0.014449599385261535
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,63,0.01815200001001358
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,127,0.01430719941854477
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,127,0.01403840035200119
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,127,0.01408800035715103
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,127,0.014043200016021728
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,127,0.014129599928855896
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,127,0.02016319930553436
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,127,0.020776000618934632
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,127,0.020099200308322906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,127,0.019836799800395967
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,127,0.019835199415683746
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,127,0.019993600249290467
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,127,0.019926400482654573
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,255,0.01802240014076233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,255,0.017441600561141968
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,255,0.01717599928379059
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,255,0.01720159947872162
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,255,0.017083199322223665
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,255,0.017235200107097625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,255,0.01723040044307709
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,255,0.024022400379180908
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,255,0.023324799537658692
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,255,0.02346239984035492
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,255,0.023214399814605713
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,255,0.02343039959669113
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,255,0.023337599635124207
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,511,0.019225600361824035
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,511,0.01738079935312271
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,511,0.018163199722766876
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,255,0.023219199478626253
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,511,0.01703840047121048
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,511,0.018219199776649476
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,511,0.018113599717617036
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,511,0.018212799727916718
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,511,0.02773280143737793
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,511,0.026759999990463256
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,511,0.02571359872817993
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,511,0.025465598702430724
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,511,0.026153600215911864
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,511,0.026609599590301514
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,511,0.02640320062637329
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,1023,0.020598399639129638
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,1023,0.018415999412536622
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,1023,0.017744000256061553
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,1023,0.01770240068435669
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,1023,0.01812800019979477
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,1023,0.018268799781799315
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,1023,0.018352000415325163
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,1023,0.02964319884777069
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,1023,0.0312032014131546
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,1023,0.028676798939704894
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,1023,0.028411200642585753
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,1023,0.029091200232505797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,1023,0.02941280007362366
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,2047,0.020105600357055664
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,2047,0.01886720061302185
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,2047,0.02311519980430603
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,1023,0.02945919930934906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,2047,0.018719999492168425
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,2047,0.019092799723148347
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,2047,0.019492800533771514
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,2047,0.019571200013160706
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,2047,0.04002079963684082
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,2047,0.03729279935359955
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,2047,0.035678398609161374
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,2047,0.036740800738334654
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,2047,0.03634240031242371
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,2047,0.03617120087146759
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,2047,0.0360368013381958
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,4095,0.02670240104198456
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,4095,0.022673599421977997
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,4095,0.021819199621677398
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,4095,0.021356800198554994
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,4095,0.022331200540065765
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,4095,0.022675199806690215
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,4095,0.023044799268245698
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,4095,0.056120002269744874
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,4095,0.06133599877357483
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,4095,0.053345602750778195
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,4095,0.051937597990036014
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,4095,0.05208160281181336
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,4095,0.05257440209388733
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,4095,0.05320159792900085
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,8191,0.03746080100536346
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,8191,0.028417599201202393
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,8191,0.033211201429367065
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,8191,0.026521599292755126
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,8191,0.030167999863624572
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,8191,0.029635199904441835
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,8191,0.029625600576400755
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,8191,0.09369760155677795
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,8191,0.09755520224571228
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,8191,0.09242879748344421
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,8191,0.09264960289001464
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,8191,0.09559360146522522
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,8191,0.09659839868545532
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,8191,0.09662560224533082
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,16383,0.04806079864501953
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,16383,0.05459520220756531
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,16383,0.04652960002422333
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,16383,0.04822559952735901
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,16383,0.04533439874649048
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,16383,0.04895040094852447
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,16383,0.049211201071739194
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,16383,0.1668496012687683
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,16383,0.1654960036277771
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,16383,0.17122880220413209
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,16383,0.16484800577163697
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,16383,0.16852159500122071
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,16383,0.16920479536056518
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,16383,0.16882239580154418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,32767,0.08213919997215272
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,32767,0.0762224018573761
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,32767,0.07334880232810974
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,32767,0.07126240134239196
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,32767,0.07378079891204833
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,32767,0.07422879934310914
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,32767,0.07431520223617553
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,32767,0.3116591930389404
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,32767,0.307806396484375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,32767,0.3057487964630127
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,32767,0.3050271987915039
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,32767,0.307476806640625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,32767,0.30763840675354004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,32767,0.30856320858001707
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,65535,0.13086559772491455
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,65535,0.12488640546798706
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,65535,0.12167520523071289
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,65535,0.1180448055267334
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,65535,0.12066559791564942
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,65535,0.12342079877853393
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,65535,0.1224511981010437
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,65535,0.5854479789733886
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,65535,0.580622386932373
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,65535,0.5785376071929932
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,65535,0.577672004699707
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,65535,0.5799407958984375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,65535,0.5805344104766845
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,131071,0.2226032018661499
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,131071,0.22838239669799804
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,131071,0.21989600658416747
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,131071,0.21919360160827636
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,131071,0.2209712028503418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,65535,0.5803040027618408
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,131071,0.2216111898422241
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,131071,0.22145919799804686
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,131071,1.12609920501709
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,131071,1.129857635498047
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,131071,1.1234047889709473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,131071,1.1223391532897948
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,1,0.013124799728393555
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,131071,1.1221440315246582
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,1,0.012664000689983367
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,1,0.012291199713945388
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,1,0.012328000366687774
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,1,0.012401600182056428
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,1,0.012191999703645706
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,131071,1.1234560012817383
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,1,0.012415999919176102
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,1,0.019153599441051484
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,1,0.018545599281787874
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,1,0.018292799592018127
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,1,0.018408000469207764
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,1,0.018294399976730345
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,1,0.018212799727916718
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,1,0.018131199479103088
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,3,0.013059200346469879
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,3,0.012347199767827988
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,3,0.012673600018024445
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,3,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,3,0.012222400307655335
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,3,0.012449599802494049
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,3,0.012371200323104858
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,3,0.0191551998257637
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,3,0.01842080056667328
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,3,0.01844639927148819
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,3,0.018427200615406036
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,3,0.01820800006389618
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,3,0.018379199504852294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,7,0.01308799982070923
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,3,0.0180976003408432
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,7,0.012656000256538392
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,7,0.01257600039243698
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,7,0.012404800206422806
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,7,0.012374400347471236
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,7,0.012383999675512314
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,7,0.012318400293588638
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,131071,1.1284159660339355
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,7,0.018639999628067016
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,7,0.01892320066690445
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,7,0.0183119997382164
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,7,0.018108800053596497
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,7,0.018246400356292724
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,7,0.01826400011777878
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,15,0.012827199697494508
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,15,0.013212800025939941
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,15,0.012435200065374375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,15,0.01234079971909523
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,7,0.018292799592018127
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,15,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,15,0.012363199889659882
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,15,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,15,0.019099199771881105
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,15,0.018675200641155243
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,15,0.018305599689483643
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,15,0.01836639940738678
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,15,0.01865600049495697
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,15,0.018300800025463103
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,15,0.018328000605106354
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,31,0.013078400492668152
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,31,0.012724800407886505
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,31,0.01260959953069687
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,31,0.012612800300121307
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,31,0.012401600182056428
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,31,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,31,0.012409599870443344
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,31,0.019100800156593323
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,31,0.018716800212860107
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,31,0.018691200017929076
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,31,0.018612800538539885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,31,0.01844480037689209
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,31,0.01823039948940277
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,31,0.01831679940223694
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,63,0.013415999710559845
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,63,0.012833599746227265
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,63,0.012680000066757202
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,63,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,63,0.012540799379348756
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,63,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,63,0.012585599720478059
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,63,0.019336000084877014
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,63,0.018852800130844116
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,63,0.018911999464035035
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,63,0.018639999628067016
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,63,0.0186271995306015
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,63,0.018675200641155243
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,63,0.018535999953746794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,127,0.015072000026702882
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,127,0.01419840008020401
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,127,0.014248000085353851
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,127,0.014646400511264802
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,127,0.014256000518798828
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,127,0.014206400513648987
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,127,0.014286400377750396
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,127,0.021414400637149812
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,127,0.020766399800777435
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,127,0.02056799978017807
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,127,0.020496000349521638
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,127,0.020659199357032774
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,127,0.0204815998673439
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,255,0.01802240014076233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,255,0.01759999990463257
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,255,0.017284800112247468
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,127,0.020099200308322906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,255,0.017419199645519256
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,255,0.017385600507259368
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,255,0.01737920045852661
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,255,0.017287999391555786
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,255,0.02605920135974884
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,255,0.025548800826072693
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,255,0.026063999533653258
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,255,0.02547520101070404
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,255,0.025412800908088683
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,255,0.025275200605392456
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,255,0.025339201092720032
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,511,0.02054239958524704
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,511,0.018353599309921264
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,511,0.017825600504875184
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,511,0.017528000473976135
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,511,0.018147200345993042
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,511,0.018649600446224213
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,511,0.018568000197410582
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,511,0.03167519867420197
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,511,0.02982879877090454
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,511,0.02908160090446472
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,511,0.028603199124336242
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,511,0.02964319884777069
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,511,0.029688000679016113
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,511,0.030209600925445557
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,1023,0.023414400219917298
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,1023,0.019617600739002226
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,1023,0.018156799674034118
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,1023,0.018665599822998046
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,1023,0.01977279931306839
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,1023,0.019169600307941438
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,1023,0.019289599359035493
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,1023,0.040300801396369934
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,1023,0.03716480135917664
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,1023,0.03564639985561371
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,1023,0.035913598537445066
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,1023,0.03611040115356445
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,1023,0.03638879954814911
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,1023,0.03680480122566223
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,2047,0.0271263986825943
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,2047,0.021699200570583343
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,2047,0.02096959948539734
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,2047,0.020193600654602052
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,2047,0.021118399500846863
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,2047,0.021080000698566435
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,2047,0.021481600403785706
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,2047,0.0613103985786438
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,2047,0.055339199304580686
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,2047,0.051755201816558835
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,2047,0.05110560059547424
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,2047,0.05039520263671875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,2047,0.051313602924346925
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,2047,0.051583999395370485
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,4095,0.026998400688171387
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,4095,0.036627200245857236
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,4095,0.032016000151634215
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,4095,0.02537600100040436
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,4095,0.02715040147304535
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,4095,0.02682879865169525
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,4095,0.027272000908851624
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,4095,0.09630879759788513
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,4095,0.09247999787330627
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,4095,0.09360160231590271
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,4095,0.09172639846801758
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,4095,0.09318559765815734
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,4095,0.09280480146408081
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,4095,0.09338240027427673
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,8191,0.05283839702606201
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,8191,0.04558559954166412
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,8191,0.04442720115184784
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,8191,0.04743840098381043
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,8191,0.04825119972229004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,8191,0.04880000054836273
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,8191,0.048430401086807254
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,8191,0.1659775972366333
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,8191,0.17038079500198364
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,8191,0.16600799560546875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,8191,0.16777119636535645
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,8191,0.16360479593276978
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,8191,0.16963839530944824
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,8191,0.1685328006744385
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,16383,0.08131359815597534
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,16383,0.07523999810218811
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,16383,0.07190560102462769
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,16383,0.06900799870491028
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,16383,0.07406880259513855
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,16383,0.07287520170211792
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,16383,0.07422239780426025
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,16383,0.3081056118011475
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,16383,0.3122688055038452
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,16383,0.30596160888671875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,16383,0.3032047986984253
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,16383,0.3069024085998535
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,32767,0.13055200576782228
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,32767,0.12391200065612792
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,16383,0.3093775987625122
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,32767,0.12090879678726196
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,16383,0.30887041091918943
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,32767,0.1188431978225708
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,32767,0.12016160488128662
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,32767,0.12284480333328247
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,32767,0.12301759719848633
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,32767,0.5879871845245361
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,32767,0.5806623935699463
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,32767,0.5771488189697266
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,32767,0.5759007930755615
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,32767,0.5801296234130859
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,65535,0.22810559272766112
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,65535,0.2169264078140259
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,65535,0.21883680820465087
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,32767,0.5831264019012451
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,32767,0.5800975799560547
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,65535,0.21684958934783935
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,65535,0.2203455924987793
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,65535,0.22146399021148683
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,65535,0.22143359184265138
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,65535,1.1294063568115233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,65535,1.1198224067687987
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,65535,1.1216336250305177
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,65535,1.121009635925293
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,65535,1.1248576164245605
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,131071,0.41327199935913084
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,131071,0.41854238510131836
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,131071,0.4154047966003418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,65535,1.124608039855957
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,131071,0.4133920192718506
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,65535,1.1245887756347657
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,131071,0.4081295967102051
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,131071,0.41795997619628905
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,131071,0.41776161193847655
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,131071,2.217001533508301
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,131071,2.2112607955932617
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,1,0.014228799939155578
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,131071,2.2119472503662108
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,131071,2.2070655822753906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,1,0.012777599692344665
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,131071,2.2196687698364257
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,1,0.012510399520397186
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,131071,2.2201583862304686
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,1,0.012587200105190276
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,1,0.012745599448680877
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,1,0.012417600303888322
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,1,0.018671999871730804
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,1,0.012692800164222718
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,1,0.01828320026397705
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,1,0.018628799915313722
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,1,0.01828480064868927
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,1,0.019131200015544893
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,1,0.018411199748516082
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,1,0.018276800215244294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,3,0.013502399623394012
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,3,0.012800000607967377
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,3,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,3,0.012641599774360657
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,3,0.012700800597667695
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,3,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,3,0.012329600006341934
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,3,0.019145600497722626
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,3,0.01870719939470291
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,3,0.018563200533390046
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,3,0.01839040070772171
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,3,0.018331199884414673
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,3,0.018587200343608855
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,3,0.018272000551223754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,7,0.013609600067138673
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,7,0.012811200320720672
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,7,0.012771199643611907
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,7,0.012555199861526489
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,7,0.012614400684833526
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,7,0.012617599964141846
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,7,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,7,0.018646399676799773
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,7,0.01931840032339096
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,7,0.018593600392341612
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,7,0.01842560023069382
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,7,0.018167999386787415
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,7,0.018403199315071107
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,7,0.01815200001001358
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,15,0.013255999982357025
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,15,0.013532799482345582
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,15,0.01276639997959137
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,15,0.012676799297332763
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,15,0.012561599910259246
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,15,0.012716799974441528
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,15,0.01260959953069687
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,15,0.019420799612998963
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,15,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,15,0.018540799617767334
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,15,0.01855839937925339
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,15,0.018246400356292724
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,15,0.018475200235843658
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,15,0.01847040057182312
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,31,0.013612799346446991
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,31,0.013222399353981017
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,31,0.012705600261688233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,31,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,31,0.01284639984369278
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,31,0.01265919953584671
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,31,0.012721599638462066
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,31,0.01932000070810318
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,31,0.018782399594783783
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,31,0.01878879964351654
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,31,0.01868640035390854
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,31,0.018515199422836304
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,31,0.018475200235843658
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,31,0.01868479996919632
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,63,0.013518400490283966
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,63,0.013736000657081604
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,63,0.01356000006198883
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,63,0.012948800623416901
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,63,0.013031999766826629
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,63,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,63,0.01279519945383072
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,63,0.01972000002861023
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,63,0.019513599574565887
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,63,0.019755199551582336
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,63,0.01903360038995743
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,63,0.01860959976911545
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,63,0.01886879950761795
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,63,0.018918399512767792
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,131071,2.2130144119262694
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,127,0.015403200685977936
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,127,0.01496800035238266
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,127,0.014531199634075165
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,127,0.014723199605941772
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,127,0.014412799477577209
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,127,0.014937600493431092
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,127,0.02292959988117218
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,127,0.022862400114536285
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,127,0.022316800057888032
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,127,0.023822399973869323
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,127,0.02244960069656372
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,127,0.02242880016565323
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,127,0.0142752006649971
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,127,0.022495999932289124
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,255,0.01851679980754852
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,255,0.01794400066137314
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,255,0.01770720034837723
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,255,0.017481599748134614
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,255,0.017518399655818938
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,255,0.017652800679206847
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,255,0.01752000004053116
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,255,0.029488000273704528
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,255,0.028415998816490172
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,255,0.028571200370788575
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,255,0.028259199857711793
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,255,0.02847039997577667
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,255,0.02836799919605255
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,255,0.028332799673080444
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,511,0.023384000360965728
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,511,0.0201664000749588
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,511,0.018313600122928618
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,511,0.019167999923229217
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,511,0.019675199687480927
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,511,0.019551999866962433
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,511,0.019603200256824493
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,511,0.04089120030403137
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,511,0.0378383994102478
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,511,0.035780799388885495
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,511,0.03586559891700745
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,511,0.036692801117897036
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,511,0.036353600025177
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,511,0.03665919899940491
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,1023,0.02096160054206848
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,1023,0.022111999988555908
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,1023,0.026652801036834716
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,1023,0.021036800742149354
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,1023,0.021279999613761903
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,1023,0.02083040028810501
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,1023,0.02038400024175644
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,1023,0.05576320290565491
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,1023,0.053406399488449094
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,1023,0.062350398302078246
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,1023,0.05156319737434387
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,1023,0.05141599774360657
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,1023,0.05117440223693848
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,1023,0.05131040215492248
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,2047,0.03377760052680969
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,2047,0.027595201134681703
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,2047,0.02529920041561127
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,2047,0.0380160003900528
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,2047,0.025655999779701233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,2047,0.026097598671913146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,2047,0.02531839907169342
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,2047,0.0940720021724701
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,2047,0.09767040014266967
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,2047,0.09261760115623474
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,2047,0.09102079868316651
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,2047,0.09223679900169372
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,2047,0.09238719940185547
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,2047,0.09248480200767517
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,4095,0.05375999808311462
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,4095,0.04721280038356781
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,4095,0.04328320026397705
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,4095,0.04491199851036072
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,4095,0.045332801342010495
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,4095,0.04596799910068512
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,4095,0.04519999921321869
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,4095,0.17057119607925414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,4095,0.16289600133895873
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,4095,0.16559840440750123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,4095,0.16302239894866943
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,4095,0.16538239717483522
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,4095,0.16538079977035522
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,4095,0.1658064007759094
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,8191,0.07846239805221558
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,8191,0.06899840235710145
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,8191,0.07217919826507568
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,8191,0.07217119932174683
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,8191,0.06797599792480469
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,8191,0.07292640209197998
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,8191,0.07279840111732483
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,8191,0.3099792003631592
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,8191,0.3049007892608643
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,8191,0.3028719902038574
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,8191,0.3028111934661865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,8191,0.3079263925552368
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,8191,0.30691840648651125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,16383,0.1202895998954773
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,16383,0.12686079740524292
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,16383,0.1175104022026062
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,16383,0.11584960222244263
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,16383,0.12042239904403687
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,8191,0.3079279899597168
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,16383,0.12120959758758545
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,16383,0.12044320106506348
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,16383,0.5833151817321778
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,16383,0.5740015983581543
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,16383,0.5792255878448487
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,16383,0.5746543884277344
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,16383,0.5795839786529541
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,16383,0.5790143966674804
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,32767,0.22124159336090088
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,32767,0.21550400257110597
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,32767,0.2123807907104492
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,32767,0.21114399433135986
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,32767,0.21566400527954102
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,16383,0.5804768085479737
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,32767,0.21578080654144288
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,32767,0.21549758911132813
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,32767,1.1240400314331054
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,32767,1.122059154510498
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,32767,1.1196352005004884
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,32767,1.1107215881347656
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,32767,1.1235072135925293
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,32767,1.1189151763916017
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,65535,0.4197472095489502
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,65535,0.40749921798706057
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,65535,0.40486559867858884
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,65535,0.40197601318359377
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,65535,0.40641121864318847
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,32767,1.1191472053527831
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,65535,0.4078239917755127
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,65535,0.4106031894683838
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,65535,2.2156272888183595
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,131071,0.7956143856048584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,65535,2.194318389892578
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,65535,2.207896041870117
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,65535,2.1894784927368165
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,65535,2.2047119140625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,65535,2.2084415435791014
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,65535,2.7376352310180665
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,131071,0.7868912220001221
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,131071,0.7879024028778077
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,131071,0.7820015907287597
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,131071,0.7875232219696044
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,131071,0.7866864204406738
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,131071,0.7872384071350098
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,1,0.014791999757289887
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,1,0.013673600554466248
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,1,0.01361600011587143
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,1,0.01318880021572113
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,1,0.013247999548912048
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,1,0.01342879980802536
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,1,0.013220800459384919
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,1,0.020478400588035583
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,1,0.019244800508022308
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,1,0.01912959963083267
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,1,0.01889919936656952
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,1,0.01902880072593689
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,1,0.0189968004822731
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,1,0.018982400000095368
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,3,0.014977599680423736
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,3,0.01372160017490387
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,3,0.013558399677276612
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,131071,4.3681072235107425
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,3,0.01308639943599701
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,3,0.013104000687599182
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,3,0.013084800541400909
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,131071,4.364513778686524
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,131071,4.350526428222656
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,3,0.013158400356769562
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,131071,4.38260498046875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,3,0.018803200125694274
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,131071,4.352830505371093
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,131071,4.353230285644531
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,3,0.020452800393104553
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,3,0.018628799915313722
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,3,0.019385600090026857
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,3,0.019366399943828584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,7,0.014742399752140044
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,7,0.013624000549316406
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,7,0.013278399407863618
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,7,0.013308799266815186
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,3,0.01881600022315979
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,3,0.019097599387168884
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,7,0.01311040073633194
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,7,0.013288000226020813
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,7,0.01319040060043335
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,7,0.019531199336051942
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,7,0.019057600200176238
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,7,0.018881599605083465
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,7,0.01908479928970337
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,7,0.020665599405765532
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,7,0.019118399918079378
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,7,0.019257600605487823
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,15,0.014843200147151948
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,15,0.013889600336551667
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,15,0.01343040019273758
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,15,0.013123199343681335
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,15,0.013366399705410004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,15,0.0135903999209404
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,15,0.01326880007982254
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,15,0.0208624005317688
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,15,0.01964319944381714
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,15,0.01897439956665039
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,15,0.019256000220775605
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,15,0.01934240013360977
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,15,0.019041599333286287
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,15,0.01896799951791763
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,31,0.015201599895954132
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,31,0.013702400028705597
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,31,0.013575999438762665
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,31,0.013264000415802002
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,31,0.013276800513267517
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,31,0.013283200562000275
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,31,0.013145600259304047
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,31,0.02122559994459152
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,31,0.020164799690246583
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,31,0.019735999405384064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,31,0.019289599359035493
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,31,0.019684800505638124
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,31,0.019398400187492372
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,31,0.019310399889945984
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,63,0.015911999344825744
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,63,0.014022399485111237
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,63,0.013872000575065612
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,63,0.013836799561977387
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,63,0.013793599605560303
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,63,0.013646399974822998
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,63,0.013868799805641175
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,63,0.023287999629974365
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,63,0.02195200026035309
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,63,0.022092799842357635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,63,0.02146719992160797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,63,0.021649600565433504
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,131071,6.648308563232422
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,63,0.02192319929599762
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,127,0.017097599804401398
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,63,0.021587200462818146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,127,0.015769599378108977
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,127,0.015782399475574492
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,127,0.015494400262832641
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,127,0.01539359986782074
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,127,0.01589599996805191
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,127,0.027644801139831542
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,127,0.026020801067352294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,127,0.026023998856544495
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,127,0.02645600140094757
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,127,0.02606239914894104
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,127,0.018246400356292724
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,127,0.02619200050830841
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,127,0.02617599964141846
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,255,0.020126399397850037
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,255,0.01908160001039505
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,255,0.018603199720382692
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,255,0.01867839992046356
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,255,0.021219199895858763
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,255,0.01881760060787201
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,255,0.018692800402641298
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,255,0.03623839914798736
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,255,0.0353632003068924
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,255,0.03489919900894165
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,255,0.03478400111198425
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,255,0.03502239882946014
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,255,0.03501920104026794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,255,0.03731679916381836
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,511,0.02911840081214905
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,511,0.02104160040616989
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,511,0.02356799989938736
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,511,0.0203792005777359
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,511,0.02172800004482269
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,511,0.021836799383163453
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,511,0.02123039960861206
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,511,0.06488159894943238
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,511,0.05321120023727417
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,511,0.05178400278091431
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,511,0.051841598749160764
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,511,0.056923198699951175
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,511,0.05141119956970215
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,511,0.051052802801132204
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,1023,0.04123519957065582
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,1023,0.024161599576473236
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,1023,0.024875199794769286
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,1023,0.03411679863929749
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,1023,0.025140801072120668
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,1023,0.02831839919090271
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,1023,0.024560000002384185
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,1023,0.10036959648132324
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,1023,0.09159359931945801
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,1023,0.09099040031433106
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,1023,0.09415199756622314
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,1023,0.09126719832420349
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,1023,0.09239519834518432
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,1023,0.09043999910354614
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,2047,0.056113600730895996
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,2047,0.04397439956665039
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,2047,0.04853599965572357
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,2047,0.045638400316238406
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,2047,0.0443231999874115
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,2047,0.04400480091571808
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,2047,0.04418880045413971
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,2047,0.17248480319976806
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,2047,0.16411999464035035
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,2047,0.16262400150299072
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,2047,0.16237280368804932
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,2047,0.1664687991142273
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,2047,0.1630079984664917
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,2047,0.16304960250854492
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,4095,0.07264320254325866
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,4095,0.08107680082321167
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,4095,0.06955680251121521
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,4095,0.06811040043830871
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,4095,0.0699567973613739
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,4095,0.07019839882850647
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,4095,0.0694383978843689
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,4095,0.3047712087631226
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,4095,0.31212639808654785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,4095,0.3044991970062256
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,4095,0.30091679096221924
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,4095,0.3035072088241577
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,4095,0.30296640396118163
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,8191,0.1212048053741455
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,8191,0.1158336043357849
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,8191,0.12969119548797609
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,8191,0.11806080341339112
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,8191,0.14282239675521852
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,4095,0.30597920417785646
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,8191,0.1214303970336914
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,8191,0.12093919515609741
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,8191,0.5755119800567627
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,8191,0.583787202835083
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,8191,0.5759391784667969
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,8191,0.5762639999389648
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,8191,0.5773871898651123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,8191,0.9422816276550293
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,8191,0.5811279773712158
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,16383,0.22511041164398193
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,16383,0.21374719142913817
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,16383,0.21732640266418457
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,16383,0.2114880084991455
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,16383,0.21507840156555175
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,16383,0.22219679355621338
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,16383,0.2160128116607666
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,16383,1.1186495780944825
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,16383,1.1211423873901367
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,16383,1.122214412689209
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,16383,1.1112784385681151
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,16383,1.115559959411621
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,16383,1.1242079734802246
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,16383,1.3426832199096679
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,32767,0.4163008213043213
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,32767,0.4087952136993408
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,32767,0.40464320182800295
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,32767,0.4030623912811279
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,32767,0.4061840057373047
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,32767,0.40681281089782717
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,32767,0.40755519866943357
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,65535,0.7986127853393554
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,32767,2.208448028564453
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,32767,2.2092767715454102
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,32767,2.1908735275268554
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,32767,2.1942367553710938
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,32767,2.1942207336425783
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,32767,2.911529541015625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,32767,2.202934455871582
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,65535,0.7880320072174072
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,65535,0.7860064029693603
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,65535,0.7837071895599366
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,65535,0.7861743927001953
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,65535,0.7864768028259277
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,65535,0.788105583190918
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,65535,4.38166389465332
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,65535,4.382684707641602
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,1,0.015939199924468996
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,1,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,1,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,1,0.014097599685192109
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,1,0.01395999938249588
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,1,0.01403840035200119
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,1,0.014180800318717957
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,1,0.021911999583244322
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,1,0.020084799826145174
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,65535,4.335559844970703
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,1,0.019582399725914003
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,1,0.020075200498104094
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,65535,4.345161437988281
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,65535,4.390732955932617
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,1,0.02003680020570755
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,1,0.019654400646686554
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,1,0.019787199795246124
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,65535,4.383359909057617
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,65535,4.384467315673828
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,3,0.014281600713729858
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,3,0.014302399754524232
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,3,0.013967999815940857
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,3,0.015876799821853638
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,3,0.014800000190734863
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,3,0.020294399559497835
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,3,0.021393600106239318
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,3,0.020103999972343446
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,3,0.014097599685192109
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,3,0.014241600036621093
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,3,0.019383999705314636
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,3,0.01988479942083359
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,3,0.019780799746513367
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,7,0.015747199952602386
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,3,0.01972319930791855
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,7,0.014127999544143677
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,7,0.013942399621009826
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,7,0.014047999680042268
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,7,0.014156800508499146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,7,0.021934400498867034
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,7,0.014542399346828461
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,7,0.014054399728775025
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,7,0.020177599787712098
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,7,0.01993120014667511
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,7,0.019897599518299103
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,7,0.019976000487804412
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,15,0.015777599811553956
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,7,0.019739200174808503
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,7,0.01998399943113327
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,15,0.014580799639225006
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,15,0.014094400405883788
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,15,0.013945600390434265
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,15,0.013990400731563568
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,15,0.01438080072402954
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,15,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,15,0.022260800004005432
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,15,0.020937600731849672
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,15,0.019894400238990785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,15,0.020715199410915375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,15,0.02016319930553436
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,15,0.02046400010585785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,15,0.020545600354671477
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,31,0.01563200056552887
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,31,0.014731200039386749
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,31,0.014310400187969207
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,31,0.014315199851989747
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,31,0.013902400434017182
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,31,0.014073599874973298
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,31,0.014558400213718414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,31,0.023004800081253052
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,31,0.0225600004196167
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,31,0.024022400379180908
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,31,0.02215999960899353
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,31,0.022121599316596983
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,31,0.02232639938592911
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,31,0.02253919988870621
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,63,0.016700799763202667
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,63,0.014932799339294433
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,63,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,63,0.014774399995803832
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,63,0.014735999703407287
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,63,0.014448000490665436
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,63,0.014793600142002105
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,63,0.02709920108318329
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,63,0.025969600677490233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,63,0.02547520101070404
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,63,0.025731199979782106
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,63,0.025337600708007814
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,63,0.02526719868183136
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,63,0.025755199790000915
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,127,0.019628800451755524
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,127,0.017289599776268004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,127,0.016505600512027742
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,127,0.01645440012216568
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,127,0.01621599942445755
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,127,0.016708800196647645
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,127,0.016791999340057373
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,127,0.03535040020942688
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,127,0.03324800133705139
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,127,0.03306399881839752
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,127,0.03297280073165894
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,127,0.03293280005455017
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,127,0.03264159858226776
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,255,0.021963199973106383
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,127,0.0329263985157013
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,255,0.020431999862194062
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,255,0.019351999461650848
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,255,0.019662399590015412
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,255,0.019659200310707094
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,255,0.019499200582504272
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,255,0.019859200716018675
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,255,0.054657602310180665
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,255,0.05099679827690125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,255,0.050183999538421634
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,255,0.04917919933795929
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,255,0.049686399102210996
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,255,0.04824959933757782
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,255,0.049030399322509764
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,511,0.045270401239395144
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,511,0.036111998558044436
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,511,0.03235200047492981
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,511,0.025407999753952026
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,511,0.025603199005126955
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,511,0.02693440020084381
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,511,0.02712160050868988
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,511,0.10440319776535034
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,511,0.09595519900321961
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,511,0.09169279932975768
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,511,0.0932096004486084
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,511,0.09275040030479431
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,511,0.09324640035629272
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,511,0.09300000071525574
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,1023,0.05985599756240845
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,1023,0.05082560181617737
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,1023,0.04562880098819733
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,1023,0.04527519941329956
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,1023,0.04595040082931519
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,1023,0.04573279917240143
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,1023,0.04709919989109039
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,1023,0.17587039470672608
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,1023,0.16827839612960815
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,1023,0.16493120193481445
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,1023,0.1647264003753662
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,1023,0.16354880332946778
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,1023,0.16536959409713745
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,1023,0.16652640104293823
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,2047,0.08471360206604003
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,2047,0.07594879865646362
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,2047,0.07092480063438415
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,2047,0.06889920234680176
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,2047,0.06887999773025513
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,2047,0.07027999758720398
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,2047,0.0706496000289917
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,2047,0.3146271944046021
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,2047,0.3077359914779663
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,2047,0.3029247999191284
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,2047,0.3035423994064331
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,2047,0.30404000282287597
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,2047,0.3046720027923584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,2047,0.30532801151275635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,4095,0.13410240411758423
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,4095,0.11888959407806396
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,4095,0.1167248010635376
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,4095,0.12390880584716797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,4095,0.11953120231628418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,4095,0.12093119621276856
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,4095,0.12131199836730958
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,4095,0.5797296047210694
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,4095,0.5749567985534668
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,4095,0.5871727943420411
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,4095,0.5771743774414062
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,4095,0.5738480091094971
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,4095,0.5789599895477295
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,4095,0.5874703884124756
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,8191,0.2306879997253418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,8191,0.21528639793395996
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,8191,0.2109935998916626
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,8191,0.2172991991043091
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,8191,0.2199023962020874
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,8191,0.2220304012298584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,8191,0.2267983913421631
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,8191,1.13569917678833
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,8191,1.1222816467285157
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,8191,1.1256416320800782
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,8191,1.1146400451660157
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,8191,1.125603199005127
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,8191,1.1203840255737305
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,8191,1.247428798675537
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,16383,0.4220272064208984
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,16383,0.41209120750427247
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,16383,0.4031184196472168
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,16383,0.405836820602417
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,16383,0.4077871799468994
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,16383,0.41263680458068847
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,16383,0.4119728088378906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,16383,2.569108772277832
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,16383,2.2046064376831054
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,16383,2.2089839935302735
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,16383,2.2054880142211912
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,16383,2.2102783203125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,16383,2.213257598876953
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,16383,2.2146623611450194
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,32767,0.791926383972168
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,32767,0.7835552215576171
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,32767,0.7857888221740723
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,32767,1.10348482131958
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,32767,0.7884575843811035
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,32767,0.7930831909179688
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,32767,0.792310380935669
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,1,0.017446400225162507
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,1,0.015638400614261628
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,1,0.015806399285793304
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,1,0.015371200442314149
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,1,0.01576640009880066
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,1,0.015328000485897064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,1,0.015863999724388123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,1,0.023830400407314302
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,1,0.02202560007572174
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,1,0.021967999637126923
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,1,0.021942399442195892
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,1,0.021777600049972534
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,1,0.02173440009355545
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,1,0.021768000721931458
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,3,0.017497600615024568
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,3,0.0158160001039505
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,3,0.01584160029888153
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,32767,4.349728012084961
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,3,0.015263999998569488
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,32767,4.356289672851562
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,32767,4.336528015136719
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,32767,4.37457275390625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,3,0.015175999701023101
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,3,0.015526400506496429
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,32767,4.388060760498047
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,32767,4.390996932983398
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,3,0.021499200165271758
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,3,0.02154559940099716
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,3,0.015675200521945952
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,3,0.02396959960460663
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,3,0.021294400095939636
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,7,0.017745600640773775
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,3,0.02181279957294464
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,7,0.01571040004491806
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,3,0.021353599429130555
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,7,0.015460799634456634
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,3,0.021934400498867034
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,7,0.015715199708938598
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,7,0.015880000591278077
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,7,0.015220800042152404
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,7,0.023668800294399262
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,7,0.015440000593662262
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,7,0.021910400688648225
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,7,0.022155199944972993
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,7,0.021612800657749176
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,7,0.021958400309085847
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,7,0.022230400145053862
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,7,0.02160319983959198
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,15,0.01578560024499893
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,15,0.0174575999379158
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,15,0.016011199355125426
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,15,0.015852800011634825
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,15,0.015919999778270723
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,15,0.015568000078201295
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,15,0.015702399611473083
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,32767,5.860800170898438
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,15,0.025824001431465148
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,15,0.02428320050239563
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,15,0.024296000599861145
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,15,0.023974399268627166
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,15,0.024241599440574645
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,15,0.023772799968719484
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,15,0.02391359955072403
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,31,0.017851200699806214
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,31,0.01584800034761429
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,31,0.015806399285793304
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,31,0.015972800552845
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,31,0.01571680009365082
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,31,0.016123199462890626
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,31,0.02894560098648071
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,31,0.027083200216293336
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,31,0.020364800095558168
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,31,0.026688000559806822
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,31,0.026787200570106508
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,31,0.02672159969806671
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,31,0.02645919919013977
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,31,0.026688000559806822
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,63,0.018716800212860107
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,63,0.016948799788951873
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,63,0.016601599752902985
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,63,0.016276800632476808
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,63,0.016604800522327424
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,63,0.016728000342845918
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,63,0.020598399639129638
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,63,0.03787040114402771
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,63,0.03375200033187866
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,63,0.033371201157569884
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,63,0.033020800352096556
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,63,0.03261919915676117
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,63,0.033057600259780884
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,127,0.022176000475883483
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,63,0.037049600481987
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,127,0.019377599656581878
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,127,0.018638400733470915
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,127,0.019044800102710722
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,127,0.018401600420475006
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,127,0.018716800212860107
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,127,0.018287999927997588
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,127,0.06142079830169678
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,127,0.05168160200119019
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,127,0.05011039972305298
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,127,0.04796479940414429
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,127,0.0486272007226944
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,127,0.04996800124645233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,127,0.04738560020923614
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,255,0.035876798629760745
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,255,0.031043198704719544
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,255,0.023559999465942384
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,255,0.025758400559425354
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,255,0.02335679978132248
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,255,0.022945599257946016
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,255,0.023068800568580627
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,255,0.09556480050086975
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,255,0.0883247971534729
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,255,0.09038400053977966
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,255,0.08786240220069885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,255,0.08776320219039917
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,255,0.08758720159530639
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,255,0.08714879751205444
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,511,0.04710400104522705
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,511,0.05303360223770141
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,511,0.04541119933128357
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,511,0.043592000007629396
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,511,0.043907201290130614
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,511,0.043699198961257936
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,511,0.043787199258804324
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,511,0.1711408019065857
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,511,0.17034080028533935
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,511,0.16435680389404297
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,511,0.1626464009284973
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,511,0.1627887964248657
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,511,0.16300640106201172
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,511,0.1621135950088501
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,1023,0.07667840123176575
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,1023,0.07218239903450012
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,1023,0.06775040030479432
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,1023,0.06707839965820313
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,1023,0.06993920207023621
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,1023,0.06789439916610718
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,1023,0.06819360256195069
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,1023,0.30888800621032714
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,1023,0.3051584005355835
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,1023,0.3020064115524292
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,1023,0.30190720558166506
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,1023,0.3024336099624634
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,1023,0.3006720066070557
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,1023,0.30100159645080565
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,2047,0.12481759786605835
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,2047,0.14772000312805175
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,2047,0.11733119487762451
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,2047,0.11559679508209228
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,2047,0.11556799411773681
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,2047,0.11515200138092041
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,2047,0.11500639915466308
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,2047,0.5806511878967285
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,2047,0.5738192081451416
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,2047,0.572327995300293
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,2047,0.5743087768554688
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,2047,0.5712624073028565
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,2047,0.5729680061340332
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,2047,0.805787181854248
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,4095,0.2198335886001587
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,4095,0.21506879329681397
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,4095,0.21230878829956054
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,4095,0.2106015920639038
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,4095,0.2100383996963501
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,4095,0.21116321086883544
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,4095,0.2172703981399536
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,4095,1.1255904197692872
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,4095,1.1109984397888184
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,4095,1.1176480293273925
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,4095,1.1156656265258789
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,4095,1.1129743576049804
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,4095,1.1104559898376465
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,4095,1.3449104309082032
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,8191,0.42409601211547854
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,8191,0.4005440235137939
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,8191,0.4063263893127441
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,8191,0.40370879173278806
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,8191,0.4024911880493164
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,8191,0.4008927822113037
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,8191,0.41337761878967283
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,16383,0.7932991981506348
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,8191,2.2144256591796876
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,8191,2.19366569519043
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,8191,2.190083122253418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,8191,2.18875675201416
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,8191,2.2043840408325197
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,8191,2.188960075378418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,8191,2.8000463485717773
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,16383,0.7924911975860596
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,16383,0.7811583995819091
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,16383,0.7849088191986084
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,16383,0.7807775974273682
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,16383,0.7800784111022949
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,16383,0.78089280128479
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,1,0.02808000147342682
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,1,0.023737600445747374
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,1,0.022878399491310118
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,1,0.02202560007572174
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,1,0.02263360023498535
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,1,0.02231519967317581
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,1,0.02212799936532974
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,16383,4.351313781738281
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,1,0.035519999265670774
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,16383,4.3334800720214846
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,16383,4.3791648864746096
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,16383,4.331137466430664
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,16383,4.377422332763672
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,16383,4.347278213500976
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,1,0.029190400242805482
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,1,0.02940160036087036
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,1,0.027852800488471986
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,16383,5.543099212646484
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,3,0.02253919988870621
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,1,0.028360000252723692
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,3,0.023127999901771546
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,3,0.022235199809074402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,1,0.028409600257873535
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,3,0.02672959864139557
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,3,0.022099199891090392
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,1,0.028540799021720888
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,3,0.02194720059633255
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,3,0.029467201232910155
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,3,0.03207040131092072
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,3,0.03636319935321808
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,3,0.02914080023765564
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,3,0.028777599334716797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,3,0.02868640124797821
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,3,0.02890079915523529
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,3,0.028638398647308348
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,7,0.027823999524116516
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,7,0.022683200240135194
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,7,0.022092799842357635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,7,0.022409600019454957
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,7,0.028174400329589844
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,7,0.02247679978609085
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,7,0.02210240066051483
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,7,0.03926079869270325
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,7,0.03172479867935181
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,7,0.030934399366378783
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,7,0.030267199873924254
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,7,0.030748799443244934
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,7,0.03060320019721985
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,7,0.037278398871421814
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,15,0.02311680018901825
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,15,0.028774398565292358
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,15,0.02265920042991638
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,15,0.02260800004005432
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,15,0.02231999933719635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,15,0.022038400173187256
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,15,0.02231519967317581
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,15,0.034283199906349184
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,15,0.05003039836883545
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,15,0.033687999844551085
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,15,0.033432000875473024
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,15,0.03341760039329529
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,15,0.03315680027008057
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,15,0.033251199126243594
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,31,0.03026399910449982
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,31,0.023188799619674683
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,31,0.027857598662376405
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,31,0.022628800570964815
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,31,0.022470399737358093
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,31,0.02258239984512329
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,31,0.022203199565410614
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,31,0.0418832004070282
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,31,0.04354560077190399
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,31,0.040119999647140504
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,31,0.05199199914932251
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,63,0.03722400069236755
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,31,0.0393312007188797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,31,0.03896960020065308
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,31,0.03882560133934021
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,63,0.025729599595069885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,63,0.026500800251960756
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,63,0.023384000360965728
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,63,0.023577600717544556
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,63,0.023472000658512116
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,63,0.023206399381160737
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,63,0.06034719944000244
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,63,0.06196640133857727
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,63,0.05388000011444092
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,63,0.05370240211486817
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,63,0.05764639973640442
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,63,0.053345602750778195
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,127,0.043665599822998044
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,63,0.06924480199813843
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,127,0.04002400040626526
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,127,0.03532159924507141
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,127,0.03258239924907684
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,127,0.029604798555374144
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,127,0.029679998755455017
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,127,0.028412801027297974
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,127,0.10412960052490235
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,127,0.09515039920806885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,127,0.09741439819335937
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,127,0.10663679838180543
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,127,0.09432799816131592
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,127,0.09489279985427856
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,127,0.09465919733047486
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,255,0.0591808021068573
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,255,0.051755201816558835
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,255,0.04836480021476745
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,255,0.049486398696899414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,255,0.04726240038871765
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,255,0.046649599075317384
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,255,0.04574080109596253
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,255,0.17075200080871583
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,255,0.18050719499588014
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,255,0.16857119798660278
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,255,0.16767200231552123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,255,0.16898880004882813
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,255,0.16639519929885865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,255,0.1676959991455078
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,511,0.09187840223312378
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,511,0.08370879888534546
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,511,0.07773759961128235
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,511,0.07581760287284851
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,511,0.07492160201072692
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,511,0.07446399927139283
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,511,0.07476800084114074
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,511,0.3317823886871338
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,511,0.31251840591430663
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,511,0.32319519519805906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,511,0.31178879737854004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,511,0.31113440990448
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,511,0.31082561016082766
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,511,0.3098720073699951
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,1023,0.14262080192565918
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,1023,0.1296496033668518
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,1023,0.12167199850082397
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,1023,0.12283040285110473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,1023,0.12522879838943482
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,1023,0.1216048002243042
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,1023,0.12130880355834961
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,1023,0.5857920169830322
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,1023,0.5840784072875976
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,1023,0.5812560081481933
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,1023,0.5789696216583252
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,1023,0.5819888114929199
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,1023,0.5784783840179444
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,1023,0.8376095771789551
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,2047,0.2242959976196289
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,2047,0.23381600379943848
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,2047,0.21667520999908446
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,2047,0.2202080011367798
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,2047,0.21575520038604737
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,2047,0.21604640483856202
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,2047,0.22496800422668456
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,2047,1.1384688377380372
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,2047,1.1244223594665528
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,2047,1.1194160461425782
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,2047,1.1162096023559571
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,2047,1.1208383560180664
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,2047,1.1172719955444337
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,2047,1.3994383811950684
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,4095,0.42979998588562013
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,4095,0.41635842323303224
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,4095,0.40630078315734863
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,4095,0.410097599029541
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,4095,0.4048192024230957
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,4095,0.40546398162841796
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,4095,0.4144847869873047
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,8191,1.1289423942565917
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,4095,2.226371192932129
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,4095,2.2056079864501954
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,4095,2.19674072265625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,4095,2.1937183380126952
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,4095,2.2024688720703125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,4095,2.194060707092285
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,4095,2.201095962524414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,8191,0.9334927558898926
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,8191,0.7856624126434326
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,8191,0.781822395324707
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,8191,0.7840384006500244
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,8191,0.7840608119964599
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,8191,0.7880911827087402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,1,0.0545199990272522
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,1,0.04191679954528808
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,1,0.0380160003900528
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,1,0.037217599153518674
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,1,0.037011200189590455
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,8191,4.3733055114746096
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,8191,4.367982482910156
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,1,0.0364080011844635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,1,0.03617120087146759
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,1,0.06304159760475159
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,1,0.050551998615264895
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,1,0.04428159892559051
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,1,0.04317759871482849
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,1,0.04310719966888428
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,1,0.04277600049972534
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,1,0.04283199906349182
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,8191,4.377254486083984
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,3,0.05377280116081238
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,3,0.04092960059642792
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,8191,4.333788681030273
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,8191,4.379304122924805
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,3,0.037252798676490784
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,8191,4.37796630859375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,3,0.0365664005279541
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,3,0.03792479932308197
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,3,0.03649759888648987
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,3,0.06425920128822327
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,3,0.04604159891605377
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,3,0.03667359948158264
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,3,0.05332159996032715
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,3,0.04542239904403687
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,3,0.04479039907455444
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,3,0.04469760060310364
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,3,0.04517279863357544
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,7,0.0543936014175415
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,7,0.03673279881477356
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,7,0.03765760064125061
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,7,0.03660320043563843
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,7,0.04203839898109436
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,7,0.03684960007667541
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,7,0.03659679889678955
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,7,0.06695839762687683
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,7,0.05657280087471008
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,7,0.04816479980945587
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,7,0.04795520007610321
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,7,0.0499424010515213
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,7,0.04787200093269348
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,15,0.0548687994480133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,7,0.04709919989109039
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,8191,5.824143981933593
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,15,0.03789600133895874
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,15,0.03708159923553467
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,15,0.04219039976596832
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,15,0.037049600481987
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,15,0.037062400579452516
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,15,0.036487999558448794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,15,0.05385919809341431
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,15,0.053615999221801755
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,15,0.05656800270080566
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,15,0.053014397621154785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,15,0.06440160274505616
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,15,0.0539247989654541
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,31,0.045310398936271666
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,31,0.05628799796104431
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,15,0.09810720086097717
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,31,0.03836480081081391
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,31,0.036976000666618346
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,31,0.03670240044593811
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,31,0.0368367999792099
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,31,0.19188480377197265
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,31,0.09092959761619568
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,31,0.07062559723854064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,31,0.06774399876594543
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,31,0.0674239993095398
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,31,0.07447680234909057
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,31,0.06755679845809937
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,31,0.10077760219573975
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,63,0.06263359785079955
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,63,0.05130400061607361
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,63,0.04463520050048828
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,63,0.046988800168037415
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,63,0.042952001094818115
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,63,0.05240960121154785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,63,0.04018239974975586
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,63,0.11627520322799682
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,63,0.11116479635238648
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,63,0.10727839469909668
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,63,0.1262768030166626
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,63,0.12090079784393311
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,63,0.106278395652771
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,63,0.10447039604187011
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,127,0.07492799758911133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,127,0.06490560173988343
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,127,0.059640002250671384
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,127,0.061185598373413086
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,127,0.056857597827911374
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,127,0.054416000843048096
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,127,0.05484319925308227
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,127,0.18495999574661254
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,127,0.19507039785385133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,127,0.17935839891433716
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,127,0.19595999717712403
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,127,0.1762336015701294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,127,0.17482080459594726
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,127,0.1750671982765198
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,255,0.100547194480896
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,255,0.08989440202713013
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,255,0.08463360071182251
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,255,0.08786720037460327
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,255,0.08184000253677368
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,255,0.08100799918174743
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,255,0.08089280128479004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,255,0.3189615964889526
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,255,0.33252639770507814
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,255,0.31522719860076903
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,255,0.31261439323425294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,255,0.34318079948425295
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,255,0.3116703987121582
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,511,0.16978399753570556
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,255,0.3119055986404419
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,511,0.15212479829788209
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,511,0.14320160150527955
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,511,0.14875999689102173
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,511,0.13656480312347413
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,511,0.13459999561309816
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,511,0.13454240560531616
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,511,0.6088272094726562
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,511,0.6267312049865723
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,511,0.593886423110962
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,511,0.5909311771392822
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,511,0.5945663928985596
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,511,0.6920896053314209
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,1023,0.23594880104064941
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,1023,0.23120479583740233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,1023,0.262007999420166
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,1023,0.24517440795898438
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,1023,0.24530880451202391
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,511,0.5930335998535157
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,1023,0.22692639827728273
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,1023,0.22711679935455323
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,1023,1.148921585083008
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,1023,1.1314399719238282
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,1023,1.1617391586303711
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,1023,1.1373023986816406
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,1023,1.1312047958374023
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,1023,1.134291172027588
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,1023,1.425598430633545
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,2047,0.41953601837158205
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,2047,0.4528192043304443
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,2047,0.43244161605834963
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,2047,0.4242095947265625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,2047,0.41693921089172364
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,2047,0.414899206161499
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,2047,0.43440799713134765
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,4095,0.8908783912658691
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,2047,2.2463232040405274
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,2047,2.2348880767822266
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,2047,2.2050880432128905
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,2047,2.226710319519043
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,2047,2.20894718170166
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,2047,2.206737518310547
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,2047,2.219286346435547
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,4095,0.8029696464538574
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,4095,0.7972064018249512
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,4095,1.0225808143615722
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,4095,0.7945536136627197
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,4095,0.7927680015563965
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,4095,0.7945680141448974
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,1,0.0935584008693695
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,1,0.0755024015903473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,1,0.06615679860115051
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,1,0.061715197563171384
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,1,0.06129760146141052
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,1,0.06076800227165222
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,1,0.060838401317596436
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,1,0.10262399911880493
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,1,0.08570719957351684
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,1,0.07799680233001709
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,1,0.06977279782295227
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,1,0.0689520001411438
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,1,0.06855040192604064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,1,0.06852959990501403
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,3,0.0904143989086151
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,3,0.0738048017024994
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,3,0.06441760063171387
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,3,0.06084160208702087
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,4095,4.394918441772461
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,4095,4.4088081359863285
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,3,0.06038399934768677
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,3,0.10419360399246216
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,3,0.06018880009651184
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,3,0.060278397798538205
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,3,0.08782079815864563
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,3,0.07386720180511475
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,3,0.07189440131187438
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,3,0.08253279924392701
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,3,0.07103520035743713
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,3,0.0710752010345459
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,4095,4.359537506103516
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,4095,4.387664031982422
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,7,0.07463840246200562
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,7,0.09355040192604065
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,7,0.06584799885749817
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,4095,4.381507110595703
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,4095,4.3893791198730465
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,7,0.060420799255371097
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,7,0.11492799520492554
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,7,0.06079840064048767
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,7,0.06032000184059143
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,7,0.08734080195426941
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,7,0.06012480258941651
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,7,0.0805072009563446
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,7,0.09939200282096863
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,7,0.07741280198097229
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,7,0.07766079902648926
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,7,0.07674559950828552
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,15,0.07601280212402343
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,15,0.09472320079803467
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,15,0.0679744005203247
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,15,0.06141279935836792
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,15,0.06029760241508484
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,15,0.06033759713172913
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,15,0.06025919914245605
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,15,0.13194719552993775
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,15,0.1130511999130249
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,15,0.09560319781303406
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,15,0.09828000068664551
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,15,0.10239360332489014
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,15,0.0931984007358551
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,15,0.09210559725761414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,31,0.0986303985118866
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,31,0.07816960215568543
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,31,0.061552000045776364
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,31,0.06930080056190491
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,31,0.06457599997520447
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,31,0.060703998804092406
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,31,0.06112800240516662
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,31,0.1626528024673462
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,31,0.1434208035469055
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,31,0.12880480289459229
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,31,0.13362079858779907
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,31,0.12717440128326415
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,31,0.12676000595092773
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,31,0.1251263976097107
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,63,0.10762239694595337
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,63,0.0732096016407013
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,63,0.0720192015171051
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,63,0.08569920063018799
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,63,0.07665600180625916
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,63,0.0703328013420105
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,63,0.0701312005519867
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,4095,6.253702545166016
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,63,0.22807519435882567
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,63,0.2072432041168213
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,63,0.1908112049102783
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,63,0.19081439971923828
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,63,0.19759199619293213
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,63,0.19244320392608644
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,127,0.0981328010559082
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,127,0.11068639755249024
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,127,0.12975360155105592
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,127,0.09286400079727172
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,127,0.09101279973983764
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,127,0.09104639887809754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,127,0.08994399905204772
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,63,0.30236799716949464
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,127,0.3426640033721924
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,127,0.36473119258880615
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,127,0.32949280738830566
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,127,0.32370240688323976
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,127,0.32144479751586913
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,127,0.32219040393829346
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,255,0.14583040475845338
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,127,0.44495677947998047
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,255,0.17999199628829957
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,255,0.15731680393218994
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,255,0.1393072009086609
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,255,0.14174239635467528
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,255,0.13946720361709594
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,255,0.13801759481430054
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,255,0.6090367794036865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,255,0.6022016048431397
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,255,0.5921840190887451
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,255,0.5919392108917236
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,255,0.589192008972168
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,255,0.587779188156128
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,255,0.8237168312072753
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,511,0.3098000049591064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,511,0.24668800830841064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,511,0.24948959350585936
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,511,0.2554383993148804
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,511,0.2754607915878296
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,511,0.24578399658203126
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,511,0.27360479831695556
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,511,1.1730735778808594
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,511,1.2073311805725098
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,511,1.1448623657226562
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,511,1.1524527549743653
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,511,1.154631996154785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,511,1.1413488388061523
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,511,1.6000415802001953
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,1023,0.46747522354125975
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,1023,0.5024543762207031
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,1023,0.4334591865539551
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,1023,0.43810558319091797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,1023,0.44949917793273925
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,1023,0.4331215858459473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,1023,0.42980160713195803
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,1023,2.268587112426758
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,1023,2.2436880111694335
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,1023,2.2109920501708986
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,1023,2.209436798095703
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,1023,2.2351919174194337
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,1023,2.238852882385254
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,1023,3.4789791107177734
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,2047,0.9083439826965332
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,2047,0.8491536140441894
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,2047,0.8170207977294922
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,2047,0.8080863952636719
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,2047,0.8280256271362305
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,2047,0.810694408416748
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,2047,0.893734359741211
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,1,0.013359999656677246
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,1,0.012971200048923492
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,1,0.012708799540996551
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,1,0.012403199821710587
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,1,0.0122079998254776
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,1,0.012214399874210358
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,1,0.012305600196123123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,1,0.01921280026435852
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,1,0.01884479969739914
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,1,0.01833280026912689
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,1,0.018174399435520173
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,1,0.01815039962530136
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,1,0.01801439970731735
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,1,0.01802079975605011
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,2047,4.377377700805664
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,2047,4.367705535888672
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,2047,4.385956954956055
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,3,0.013460800051689148
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,2047,4.358278274536133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,2047,4.414070510864258
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,2047,4.477182388305664
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,3,0.012214399874210358
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,3,0.012718400359153748
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,3,0.01886879950761795
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,3,0.012113600224256515
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,3,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,3,0.012031999975442886
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,3,0.018059200048446654
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,3,0.019171200692653656
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,3,0.01817920058965683
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,3,0.017825600504875184
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,3,0.017828799784183502
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,3,0.017847999930381775
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,3,0.012120000272989272
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,7,0.01266079992055893
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,7,0.01289760023355484
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,7,0.012249600142240524
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,7,0.012035199999809265
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,7,0.013097600638866424
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,7,0.012299200147390365
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,7,0.012124799937009812
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,7,0.01804800033569336
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,7,0.01807200014591217
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,7,0.018822400271892546
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,7,0.01778720021247864
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,7,0.018031999468803406
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,7,0.019075199961662292
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,7,0.017921599745750427
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,15,0.012929600477218629
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,15,0.012555199861526489
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,15,0.012145599722862244
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,15,0.012276799976825714
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,15,0.012144000083208085
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,15,0.01225920021533966
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,15,0.013142399489879608
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,15,0.018624000251293182
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,15,0.018249599635601042
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,15,0.018246400356292724
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,15,0.01781439930200577
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,15,0.018011200428009033
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,15,0.017798399925231932
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,31,0.013129599392414093
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,15,0.019120000302791595
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,31,0.01279360055923462
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,31,0.012625600397586822
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,31,0.012223999947309494
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,31,0.012272000312805176
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,31,0.012137600034475327
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,31,0.011990399658679962
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,31,0.01900479942560196
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,31,0.018611200153827667
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,31,0.01842239946126938
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,31,0.01807679980993271
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,31,0.017825600504875184
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,31,0.018036800622940063
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,31,0.017800000309944154
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,63,0.013176000118255616
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,63,0.012854400277137756
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,63,0.012726399302482604
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,63,0.012275200337171555
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,63,0.012195199728012085
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,63,0.012307199835777282
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,63,0.012135999649763108
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,63,0.018993599712848662
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,63,0.018670399487018586
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,63,0.018555200099945067
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,63,0.018265600502490997
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,63,0.018027199804782866
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,63,0.018068799376487733
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,63,0.01807519942522049
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,127,0.014711999893188476
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,127,0.014791999757289887
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,127,0.014211200177669525
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,127,0.013940800726413728
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,127,0.013865600526332855
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,127,0.014004799723625182
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,2047,5.8522705078125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,127,0.013889600336551667
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,127,0.02078399956226349
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,127,0.02046079933643341
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,127,0.020139199495315552
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,127,0.019732800126075745
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,127,0.019704000651836397
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,127,0.01977279931306839
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,255,0.017759999632835387
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,255,0.01799360066652298
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,255,0.017083199322223665
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,255,0.017343999445438386
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,255,0.017020800709724428
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,255,0.017059199512004852
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,255,0.01698880046606064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,127,0.02946400046348572
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,255,0.024003200232982635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,255,0.023444800078868865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,255,0.02314240038394928
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,255,0.02298399955034256
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,255,0.022808000445365906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,255,0.02280000001192093
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,511,0.019249600172042847
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,511,0.017615999281406402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,255,0.031836798787117
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,511,0.016857600212097167
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,511,0.016672000288963318
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,511,0.017521600425243377
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,511,0.017985600233078002
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,511,0.01913439929485321
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,511,0.02370239943265915
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,511,0.022924800217151643
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,511,0.022603200376033784
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,511,0.024163199961185454
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,511,0.023865599930286408
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,511,0.0348800003528595
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,511,0.025515198707580566
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,1023,0.019044800102710722
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,1023,0.016599999368190767
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,1023,0.017089599370956422
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,1023,0.0181551992893219
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,1023,0.024831999838352204
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,1023,0.01796479970216751
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,1023,0.019785599410533906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,1023,0.02526400089263916
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,1023,0.02431199997663498
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,1023,0.023793600499629974
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,1023,0.024188800156116484
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,1023,0.028758400678634645
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,1023,0.024191999435424806
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,2047,0.021491199731826782
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,1023,0.026571199297904968
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,2047,0.019705599546432494
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,2047,0.018859200179576874
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,2047,0.01767839938402176
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,2047,0.021873599290847777
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,2047,0.017899200320243835
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,2047,0.017884799838066102
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,2047,0.0300464004278183
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,2047,0.026932799816131593
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,2047,0.02815839946269989
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,2047,0.02646079957485199
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,2047,0.026009601354599
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,2047,0.02619839906692505
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,2047,0.033020800352096556
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,4095,0.023695999383926393
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,4095,0.022012799978256226
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,4095,0.020902399718761445
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,4095,0.020292800664901734
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,4095,0.019393600523471832
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,4095,0.019945600628852846
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,4095,0.025956800580024718
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,4095,0.03416639864444733
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,4095,0.03348000049591064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,4095,0.03094240128993988
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,4095,0.03165439963340759
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,4095,0.030737599730491637
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,4095,0.031222400069236756
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,4095,0.039638400077819824
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,8191,0.026440000534057616
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,8191,0.02555679976940155
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,8191,0.02328319996595383
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,8191,0.021878400444984437
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,8191,0.02348800003528595
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,8191,0.022150400280952453
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,8191,0.027156800031661987
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,8191,0.04469920098781586
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,8191,0.04287999868392944
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,8191,0.040545600652694705
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,8191,0.039868798851966855
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,8191,0.039001598954200745
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,8191,0.04131680130958557
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,8191,0.04577920138835907
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,16383,0.03192479908466339
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,16383,0.027188798785209654
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,16383,0.02573919892311096
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,16383,0.025782400369644166
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,16383,0.02815839946269989
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,16383,0.026419198513031004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,16383,0.032332798838615416
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,16383,0.06499680280685424
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,16383,0.060164797306060794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,16383,0.05565919876098633
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,16383,0.06098880171775818
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,16383,0.05923839807510376
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,16383,0.0681984007358551
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,16383,0.05578399896621704
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,32767,0.04365760087966919
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,32767,0.04195519983768463
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,32767,0.040320000052452086
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,32767,0.03366079926490784
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,32767,0.031214401125907898
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,32767,0.031147199869155883
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,32767,0.0302592009305954
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,32767,0.10526880025863647
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,32767,0.09939839839935302
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,32767,0.09846879839897156
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,32767,0.10715199708938598
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,32767,0.09778879880905152
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,32767,0.09672319889068604
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,32767,0.09542080163955688
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,65535,0.059864002466201785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,65535,0.056732797622680665
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,65535,0.05212479829788208
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,65535,0.05506560206413269
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,65535,0.050123202800750735
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,65535,0.0493151992559433
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,65535,0.04613440036773682
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,65535,0.1795456051826477
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,65535,0.1741039991378784
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,65535,0.169814395904541
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,65535,0.17112959623336793
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,65535,0.18032959699630738
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,65535,0.17108800411224365
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,65535,0.1675055980682373
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,131071,0.08900160193443299
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,131071,0.08712319731712341
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,131071,0.08208320140838624
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,131071,0.07576479911804199
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,131071,0.08391519784927368
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,131071,0.07509120106697083
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,131071,0.07330080270767211
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,131071,0.3206624031066895
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,131071,0.31825120449066163
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,131071,0.3131119966506958
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,131071,0.31353440284729006
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,131071,0.30912959575653076
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,131071,0.37834720611572265
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,1,0.013478399813175201
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,1,0.012904000282287598
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,1,0.012243200093507767
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,1,0.012585599720478059
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,1,0.012214399874210358
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,1,0.012139199674129486
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,1,0.017416000366210938
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,1,0.018932799994945525
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,131071,0.30934240818023684
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,1,0.018408000469207764
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,1,0.01812160015106201
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,1,0.01780640035867691
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,1,0.025201600790023804
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,1,0.01764799952507019
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,3,0.013500800728797913
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,1,0.01765599995851517
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,3,0.01297760009765625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,3,0.01803999990224838
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,3,0.012219200283288956
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,3,0.01220960021018982
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,3,0.012276799976825714
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,3,0.012140800058841706
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,3,0.018603199720382692
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,3,0.017980800569057466
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,3,0.0177824005484581
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,3,0.01764799952507019
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,3,0.017665599286556245
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,3,0.017880000174045563
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,3,0.027137601375579835
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,7,0.013463999330997466
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,7,0.012985600531101227
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,7,0.012227199971675873
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,7,0.012300799787044524
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,7,0.012278400361537933
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,7,0.01655679941177368
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,7,0.01215839982032776
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,7,0.019038400053977965
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,7,0.01822720021009445
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,7,0.01797119975090027
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,7,0.01767520010471344
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,7,0.017892800271511078
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,7,0.017582400143146514
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,7,0.017664000391960144
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,15,0.018433600664138794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,15,0.01295360028743744
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,15,0.012641599774360657
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,15,0.01682240068912506
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,15,0.012238399684429168
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,15,0.012067200243473053
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,15,0.012198399752378464
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,15,0.01889919936656952
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,15,0.025235199928283693
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,15,0.017982399463653563
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,15,0.017769600450992584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,15,0.01767839938402176
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,15,0.01805119961500168
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,15,0.017763200402259826
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,31,0.018299199640750885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,31,0.012736000120639801
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,31,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,31,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,31,0.01228479966521263
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,31,0.014259199798107147
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,31,0.012060800194740295
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,31,0.018969599902629853
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,31,0.018641600012779237
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,31,0.018118399381637573
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,31,0.020526400208473204
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,31,0.017535999417304993
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,31,0.017720000445842744
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,31,0.017947199940681457
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,63,0.013308799266815186
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,63,0.015083199739456177
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,63,0.01242400035262108
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,63,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,63,0.012263999879360199
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,63,0.012345600128173827
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,63,0.01408800035715103
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,63,0.01897439956665039
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,63,0.018539200723171233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,63,0.018272000551223754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,63,0.01794559955596924
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,63,0.020451200008392335
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,63,0.017742399871349335
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,63,0.01778720021247864
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,127,0.0150751993060112
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,127,0.01480800062417984
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,127,0.01664319932460785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,127,0.013915200531482697
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,127,0.014006400108337402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,127,0.013966399431228637
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,127,0.013926400244235993
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,127,0.023790399730205535
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,127,0.020033599436283113
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,127,0.01982239931821823
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,127,0.01956000030040741
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,127,0.0192671999335289
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,127,0.02096800059080124
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,127,0.019606399536132812
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,255,0.01818400025367737
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,255,0.017766399681568144
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,255,0.017164799571037292
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,255,0.01860480010509491
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,255,0.016843199729919434
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,255,0.016967999935150146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,255,0.017132799327373504
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,255,0.023515200614929198
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,255,0.025387200713157653
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,255,0.022767999768257143
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,255,0.02282720059156418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,255,0.022782400250434875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,255,0.022575999796390533
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,255,0.024540799856185912
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,511,0.01932159960269928
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,511,0.01891999989748001
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,511,0.01690399944782257
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,511,0.01791999936103821
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,511,0.018119999766349794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,511,0.017617599666118623
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,511,0.0180976003408432
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,511,0.025887998938560485
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,511,0.025243198871612547
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,511,0.025121599435806274
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,511,0.023095999658107758
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,511,0.023112000524997713
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,511,0.0239439994096756
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,511,0.02423679977655411
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,1023,0.019364799559116363
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,1023,0.018161599338054658
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,1023,0.017476800084114074
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,1023,0.01721920073032379
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,1023,0.017927999794483184
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,1023,0.018156799674034118
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,1023,0.021083199977874757
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,1023,0.029156801104545594
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,1023,0.02815519869327545
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,1023,0.02627840042114258
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,1023,0.025489598512649536
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,1023,0.026416000723838807
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,1023,0.02603200078010559
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,1023,0.02620159983634949
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,2047,0.022118400037288665
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,2047,0.02036159932613373
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,2047,0.019176000356674196
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,2047,0.01834080070257187
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,2047,0.018225599825382233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,2047,0.01828159987926483
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,2047,0.018376000225543976
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,2047,0.034220799803733826
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,2047,0.03183520138263703
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,2047,0.02945919930934906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,2047,0.030457600951194763
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,2047,0.028960001468658448
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,2047,0.029417601227760316
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,2047,0.029385599493980407
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,4095,0.025486400723457335
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,4095,0.02301599979400635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,4095,0.020759999752044678
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,4095,0.020127999782562255
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,4095,0.019726400077342988
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,4095,0.0203792005777359
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,4095,0.020763200521469117
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,4095,0.04278079867362976
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,4095,0.040561598539352414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,4095,0.037904000282287596
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,4095,0.037150400876998904
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,4095,0.036664000153541564
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,4095,0.03744960129261017
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,4095,0.03779839873313904
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,8191,0.03068479895591736
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,8191,0.027748799324035643
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,8191,0.024886399507522583
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,8191,0.02271360009908676
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,8191,0.022427199780941008
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,8191,0.02529279887676239
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,8191,0.025868800282478333
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,8191,0.06622719764709473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,8191,0.06312959790229797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,8191,0.057892799377441406
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,8191,0.057468801736831665
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,8191,0.05432159900665283
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,8191,0.05486239790916443
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,8191,0.05560960173606873
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,16383,0.042654401063919066
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,16383,0.039243200421333314
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,16383,0.034836798906326294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,16383,0.029598399996757507
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,16383,0.027190399169921876
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,16383,0.029606398940086365
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,16383,0.030316799879074097
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,16383,0.10390720367431641
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,16383,0.09573439955711364
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,16383,0.09886239767074585
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,16383,0.0946943998336792
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,16383,0.09371680021286011
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,16383,0.09489759802818298
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,16383,0.09713119864463807
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,32767,0.05832800269126892
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,32767,0.054092800617218016
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,32767,0.04954720139503479
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,32767,0.04838559925556183
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,32767,0.04643999934196472
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,32767,0.04499199986457825
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,32767,0.04872640073299408
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,32767,0.1783679962158203
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,32767,0.1727712035179138
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,32767,0.1690448045730591
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,32767,0.1668015956878662
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,32767,0.16816320419311523
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,32767,0.16619520187377929
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,32767,0.16939840316772461
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,65535,0.08682399988174438
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,65535,0.08277279734611512
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,65535,0.07715200185775757
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,65535,0.07408319711685181
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,65535,0.07222399711608887
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,65535,0.07091519832611085
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,65535,0.07364959716796875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,65535,0.3176127910614014
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,65535,0.31307039260864256
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,65535,0.31058878898620607
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,65535,0.30796959400177004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,65535,0.3062096118927002
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,65535,0.30794401168823243
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,131071,0.1439695954322815
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,131071,0.13231359720230101
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,131071,0.12664639949798584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,131071,0.12407040596008301
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,65535,0.307807993888855
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,131071,0.12199840545654297
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,131071,0.12089600563049316
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,131071,0.12330559492111207
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,131071,0.5863855838775635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,131071,0.582094383239746
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,131071,0.5822879791259765
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,131071,0.5998655796051026
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,131071,0.5802224159240723
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,131071,0.5805376052856446
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,1,0.012889599800109864
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,1,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,1,0.0123648002743721
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,1,0.012216000258922577
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,1,0.01345279961824417
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,1,0.012153600156307221
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,1,0.012319999933242797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,1,0.019390399754047393
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,1,0.018774400651454925
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,1,0.01828320026397705
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,1,0.01802079975605011
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,1,0.017995199561119078
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,1,0.018110400438308714
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,1,0.018003199994564057
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,3,0.013409599661827087
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,3,0.013036799430847169
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,3,0.012545600533485413
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,131071,0.5814032077789306
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,3,0.012392000108957291
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,3,0.01223680004477501
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,3,0.01228479966521263
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,3,0.012144000083208085
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,3,0.01930239945650101
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,3,0.018385599553585052
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,3,0.018131199479103088
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,3,0.018964800238609313
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,3,0.017897599935531618
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,3,0.017975999414920805
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,3,0.018318399786949158
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,7,0.01348160058259964
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,7,0.01311360001564026
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,7,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,7,0.012263999879360199
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,7,0.012371200323104858
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,7,0.012246400117874146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,7,0.012272000312805176
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,7,0.01907680034637451
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,7,0.018695999681949616
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,7,0.01823199987411499
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,7,0.018219199776649476
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,7,0.01813279986381531
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,7,0.017880000174045563
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,7,0.018060800433158875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,15,0.013079999387264252
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,15,0.013872000575065612
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,15,0.01279039978981018
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,15,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,15,0.012415999919176102
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,15,0.012408000230789185
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,15,0.01231520026922226
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,15,0.019070400297641753
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,15,0.018801599740982056
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,15,0.01849119961261749
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,15,0.018136000633239745
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,15,0.018054400384426118
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,15,0.018004800379276275
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,15,0.01803999990224838
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,31,0.013582399487495423
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,31,0.013075199723243714
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,31,0.012678399682044983
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,31,0.012438400089740754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,31,0.012399999797344208
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,31,0.012297599762678146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,31,0.012328000366687774
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,31,0.019225600361824035
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,31,0.018991999328136444
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,31,0.018489600718021394
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,31,0.018055999279022218
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,31,0.01800000071525574
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,31,0.018219199776649476
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,63,0.013500800728797913
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,31,0.018068799376487733
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,63,0.013220800459384919
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,63,0.012708799540996551
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,63,0.012412799894809723
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,63,0.012513600289821625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,63,0.012647999823093415
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,63,0.01239520013332367
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,63,0.01923519968986511
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,63,0.01891999989748001
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,63,0.01844480037689209
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,63,0.018423999845981597
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,63,0.01838400065898895
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,63,0.01815840005874634
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,63,0.018116800487041472
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,127,0.0147599995136261
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,127,0.015116800367832185
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,127,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,127,0.014230400323867798
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,127,0.01446399986743927
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,127,0.014241600036621093
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,127,0.01403679996728897
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,127,0.020319999754428865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,127,0.02080480009317398
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,127,0.02022559940814972
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,127,0.020897600054740905
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,127,0.019849599897861482
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,127,0.01989919990301132
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,127,0.019998399913311003
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,255,0.018063999712467194
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,255,0.01804800033569336
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,255,0.01764799952507019
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,255,0.017324799299240114
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,255,0.01720159947872162
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,255,0.01727039963006973
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,255,0.01706880033016205
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,255,0.02460319995880127
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,255,0.02396800071001053
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,255,0.023369599878787995
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,255,0.023520000278949738
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,255,0.02314079999923706
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,511,0.020172800123691558
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,255,0.023683199286460878
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,511,0.019574399292469024
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,255,0.02303680032491684
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,511,0.017903999984264375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,511,0.01751520037651062
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,511,0.01724960058927536
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,511,0.018084800243377684
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,511,0.01820800006389618
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,511,0.028764799237251282
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,511,0.028003200888633728
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,511,0.026836800575256347
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,511,0.025697600841522217
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,511,0.02512640058994293
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,511,0.026211199164390565
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,511,0.026529601216316222
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,1023,0.023532800376415253
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,1023,0.019735999405384064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,1023,0.018454399704933167
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,1023,0.017825600504875184
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,1023,0.0174687996506691
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,1023,0.01815840005874634
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,1023,0.018335999548435213
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,1023,0.03454880118370056
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,1023,0.0298335999250412
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,1023,0.03110080063343048
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,1023,0.029068800806999206
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,1023,0.02831839919090271
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,1023,0.02913439869880676
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,1023,0.02956160008907318
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,2047,0.02289759963750839
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,2047,0.02585119903087616
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,2047,0.02005600035190582
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,2047,0.018827199935913086
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,2047,0.019006399810314177
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,2047,0.019123199582099914
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,2047,0.019687999784946442
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,2047,0.03739359974861145
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,2047,0.043270400166511534
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,2047,0.04183999896049499
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,2047,0.036513599753379825
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,2047,0.03603839874267578
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,2047,0.03622719943523407
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,2047,0.03612639904022217
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,4095,0.03033440113067627
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,4095,0.026372799277305604
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,4095,0.023390400409698486
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,4095,0.021804800629615782
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,4095,0.02330880016088486
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,4095,0.021211199462413788
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,4095,0.023281599581241607
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,4095,0.06587039828300476
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,4095,0.06228960156440735
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,4095,0.05692800283432007
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,4095,0.05261920094490051
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,4095,0.054820799827575685
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,4095,0.05289760231971741
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,4095,0.052446401119232176
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,8191,0.0426367998123169
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,8191,0.03720960021018982
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,8191,0.03317599892616272
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,8191,0.02767840027809143
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,8191,0.02656959891319275
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,8191,0.03001439869403839
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,8191,0.03031519949436188
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,8191,0.10383520126342774
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,8191,0.097598397731781
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,8191,0.09366880059242248
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,8191,0.09359200000762939
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,8191,0.09262239933013916
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,8191,0.09640960097312927
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,8191,0.09462400078773499
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,16383,0.05866559743881226
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,16383,0.0489520013332367
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,16383,0.05302559733390808
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,16383,0.04633280038833618
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,16383,0.04477599859237671
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,16383,0.04841760098934174
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,16383,0.04745599925518036
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,16383,0.17730560302734374
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,16383,0.1722640037536621
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,16383,0.16717439889907837
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,16383,0.16467519998550414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,16383,0.16942720413208007
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,16383,0.17820639610290528
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,32767,0.08689759969711304
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,16383,0.16697440147399903
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,32767,0.0723039984703064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,32767,0.08208960294723511
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,32767,0.07282400131225586
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,32767,0.07294399738311767
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,32767,0.07071679830551147
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,32767,0.07414720058441163
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,32767,0.317572808265686
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,32767,0.31240479946136473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,32767,0.3077631950378418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,32767,0.36439359188079834
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,32767,0.3048896074295044
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,65535,0.1443951964378357
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,65535,0.13481760025024414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,65535,0.12494879961013794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,32767,0.30658080577850344
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,32767,0.3083456039428711
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,65535,0.11962560415267945
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,65535,0.1264847993850708
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,65535,0.12307039499282837
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,65535,0.12292319536209106
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,65535,0.5990159988403321
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,65535,0.5879439830780029
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,65535,0.5787231922149658
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,65535,0.5771088123321533
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,65535,0.7819071769714355
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,65535,0.5800896167755127
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,131071,0.22827839851379395
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,131071,0.25413439273834226
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,131071,0.22281279563903808
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,65535,0.5833615779876709
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,131071,0.24666719436645507
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,131071,0.2214656114578247
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,131071,0.22077760696411133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,131071,0.21892960071563722
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,131071,1.150222396850586
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,131071,1.1355104446411133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,1,0.013521599769592284
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,1,0.01316480040550232
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,131071,1.1229184150695801
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,131071,1.124556827545166
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,1,0.012627199292182922
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,1,0.012563200294971466
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,131071,1.1244048118591308
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,1,0.012574400007724761
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,1,0.012379200011491776
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,1,0.019539199769496918
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,1,0.01875839978456497
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,1,0.0123648002743721
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,1,0.018412800133228303
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,1,0.018060800433158875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,131071,1.8280000686645508
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,1,0.019150400161743165
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,1,0.018300800025463103
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,1,0.018276800215244294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,3,0.01483200043439865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,3,0.01308320015668869
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,3,0.012438400089740754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,3,0.012428800016641617
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,3,0.012355200201272964
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,3,0.012363199889659882
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,3,0.01940000057220459
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,3,0.02442079931497574
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,3,0.019139200448989868
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,3,0.018822400271892546
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,3,0.018321600556373597
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,3,0.018334400653839112
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,3,0.018131199479103088
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,7,0.013711999356746673
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,3,0.034985598921775815
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,7,0.013344000279903411
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,7,0.012697599828243256
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,7,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,7,0.012329600006341934
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,131071,1.1280271530151367
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,7,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,7,0.0237296000123024
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,7,0.01892320066690445
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,7,0.01961120069026947
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,7,0.01855199933052063
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,7,0.018276800215244294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,7,0.018291200697422027
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,7,0.03027999997138977
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,15,0.013358399271965027
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,15,0.014742399752140044
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,15,0.01266240030527115
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,7,0.018377600610256194
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,15,0.012408000230789185
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,15,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,15,0.0205935999751091
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,15,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,15,0.019334399700164796
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,15,0.02085600048303604
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,15,0.01879040002822876
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,15,0.01830720007419586
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,15,0.018411199748516082
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,15,0.029870399832725526
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,15,0.018449600040912627
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,31,0.013249599933624267
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,31,0.015022400021553039
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,31,0.012852799892425538
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,31,0.012507200241088867
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,31,0.012404800206422806
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,31,0.02035519927740097
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,31,0.01255200058221817
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,31,0.019801600277423857
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,31,0.019204799830913544
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,31,0.018956799805164338
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,31,0.018478399515151976
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,31,0.01842560023069382
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,31,0.018505600094795228
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,31,0.018249599635601042
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,63,0.013711999356746673
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,63,0.013443200290203095
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,63,0.012736000120639801
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,63,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,63,0.012593600153923034
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,63,0.012656000256538392
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,63,0.01255359947681427
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,63,0.01956160068511963
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,63,0.01959040015935898
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,63,0.018952000141143798
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,63,0.018889600038528444
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,63,0.018828800320625304
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,63,0.018729600310325622
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,63,0.01844000071287155
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,127,0.015475200116634369
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,127,0.014713600277900696
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,127,0.014550399780273438
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,127,0.014902399480342865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,127,0.01435679942369461
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,127,0.014006400108337402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,127,0.014457599818706512
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,127,0.021798400580883025
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,127,0.021275199949741364
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,127,0.020734399557113647
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,127,0.02093279957771301
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,127,0.02038560062646866
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,127,0.020678399503231047
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,127,0.020636799931526183
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,255,0.018355199694633485
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,255,0.018063999712467194
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,255,0.017871999740600587
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,255,0.017235200107097625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,255,0.017425599694252013
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,255,0.017428800463676453
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,255,0.0174687996506691
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,255,0.026505601406097413
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,255,0.026491200923919676
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,255,0.02545759975910187
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,255,0.02571359872817993
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,255,0.025523200631141663
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,255,0.025577598810195924
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,255,0.02540160119533539
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,511,0.019969600439071655
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,511,0.018484799563884734
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,511,0.023171199858188628
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,511,0.017815999686717987
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,511,0.017505599558353423
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,511,0.018270400166511536
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,511,0.01849920004606247
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,511,0.031472000479698184
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,511,0.02975200116634369
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,511,0.03428800106048584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,511,0.029256001114845276
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,511,0.028697600960731505
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,511,0.029452800750732422
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,511,0.03017599880695343
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,1023,0.026446399092674256
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,1023,0.02003840059041977
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,1023,0.02316800057888031
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,1023,0.018958400189876556
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,1023,0.01897120028734207
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,1023,0.019067199528217317
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,1023,0.01955839991569519
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,1023,0.04380480051040649
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,1023,0.03731679916381836
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,1023,0.03569439947605133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,1023,0.03606880009174347
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,1023,0.03648959994316101
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,1023,0.04006080031394958
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,1023,0.03637920022010803
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,2047,0.031700798869133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,2047,0.025699201226234435
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,2047,0.02205760031938553
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,2047,0.021539199352264404
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,2047,0.02077919989824295
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,2047,0.02096160054206848
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,2047,0.021704000234603883
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,2047,0.0665120005607605
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,2047,0.061508798599243165
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,2047,0.056454402208328244
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,2047,0.05428479909896851
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,2047,0.05279359817504883
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,2047,0.05102880001068115
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,2047,0.05269439816474915
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,4095,0.041591998934745786
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,4095,0.03739840090274811
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,4095,0.03192960023880005
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,4095,0.028784000873565675
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,4095,0.025484800338745117
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,4095,0.027286401391029357
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,4095,0.02717599868774414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,4095,0.10360480546951294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,4095,0.0971343994140625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,4095,0.09320480227470399
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,4095,0.09217600226402282
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,4095,0.09188960194587707
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,4095,0.09359679818153381
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,4095,0.0936784029006958
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,8191,0.05722560286521912
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,8191,0.05289919972419739
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,8191,0.045665600895881654
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,8191,0.04771040081977844
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,8191,0.044356799125671385
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,8191,0.04544320106506348
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,8191,0.04830079972743988
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,8191,0.17738560438156128
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,8191,0.16531200408935548
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,8191,0.16698399782180787
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,8191,0.17169920206069947
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,8191,0.1689103960990906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,8191,0.16416800022125244
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,8191,0.16674400568008424
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,16383,0.0842736005783081
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,16383,0.08172640204429626
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,16383,0.0744383990764618
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,16383,0.07252320051193237
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,16383,0.07060480117797852
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,16383,0.07092159986495972
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,16383,0.0730351984500885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,16383,0.31709918975830076
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,16383,0.312828803062439
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,16383,0.307856011390686
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,16383,0.3052720069885254
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,16383,0.30460319519042967
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,16383,0.3083408117294312
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,32767,0.13981280326843262
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,32767,0.13090879917144777
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,32767,0.12393280267715454
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,16383,0.3085007905960083
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,32767,0.12127360105514526
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,32767,0.11877440214157105
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,32767,0.1202015995979309
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,32767,0.1234287977218628
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,32767,0.5960207939147949
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,32767,0.5820928096771241
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,32767,0.5864927768707275
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,32767,0.5769760131835937
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,32767,0.5789040088653564
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,32767,0.5820847988128662
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,65535,0.24698081016540527
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,65535,0.2281264066696167
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,65535,0.2210848093032837
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,65535,0.21919519901275636
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,65535,0.2172192096710205
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,32767,0.5823520183563232
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,65535,0.22082240581512452
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,65535,0.2204432010650635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,65535,1.1291248321533203
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,65535,1.149942398071289
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,65535,1.1276047706604004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,65535,1.1179967880249024
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,65535,1.1197967529296875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,65535,1.1273103713989259
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,65535,1.5282575607299804
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,131071,0.42318878173828123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,131071,0.46289758682250975
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,131071,0.4185296058654785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,131071,0.4142047882080078
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,131071,0.41377921104431153
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,131071,0.41732158660888674
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,131071,0.5012959957122802
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,1,0.014988799393177033
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,1,0.013420799374580383
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,1,0.012982399761676788
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,1,0.012580800056457519
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,1,0.012614400684833526
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,1,0.012539200484752655
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,1,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,1,0.020713600516319274
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,1,0.01902880072593689
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,1,0.018403199315071107
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,1,0.01820639967918396
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,131071,2.2585615158081054
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,1,0.0181551992893219
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,131071,2.2239871978759767
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,131071,2.2011295318603517
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,131071,2.2186832427978516
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,131071,2.206875228881836
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,1,0.018011200428009033
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,1,0.018540799617767334
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,131071,2.2194671630859375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,3,0.01523520052433014
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,3,0.012342400103807449
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,3,0.012534399330615998
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,3,0.013486400246620178
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,3,0.012585599720478059
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,3,0.020635199546813966
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,3,0.013174399733543396
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,3,0.01897439956665039
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,3,0.018727999925613404
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,3,0.01839040070772171
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,3,0.012665599584579468
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,3,0.01849759966135025
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,3,0.018404799699783325
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,3,0.018244799971580506
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,7,0.014878399670124054
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,7,0.012695999443531036
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,7,0.013540799915790557
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,7,0.012736000120639801
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,7,0.012603199481964112
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,7,0.012289600074291229
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,7,0.012409599870443344
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,7,0.021345600485801697
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,7,0.018916800618171692
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,7,0.018673600256443025
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,7,0.018590399622917177
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,7,0.018249599635601042
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,7,0.018199999630451203
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,7,0.018614399433135986
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,15,0.014894400537014008
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,15,0.01300159990787506
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,15,0.01438560038805008
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,15,0.012699200212955475
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,15,0.012591999769210816
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,15,0.012716799974441528
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,15,0.012601600587368011
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,15,0.02088479995727539
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,15,0.0188400000333786
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,15,0.018691200017929076
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,15,0.020206399261951447
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,15,0.018441599607467652
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,15,0.0182559996843338
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,15,0.01823199987411499
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,31,0.015039999783039094
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,31,0.013622400164604188
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,31,0.012591999769210816
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,31,0.012641599774360657
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,31,0.013172799348831176
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,31,0.012715199589729309
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,31,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,31,0.021059200167655945
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,31,0.02045920044183731
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,31,0.018782399594783783
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,31,0.018667200207710268
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,31,0.0186831995844841
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,31,0.01847199946641922
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,31,0.018764799833297728
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,63,0.015227200090885162
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,63,0.01480640023946762
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,63,0.013124799728393555
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,63,0.01356319934129715
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,63,0.012644800543785095
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,63,0.012984000146389008
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,63,0.01276639997959137
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,63,0.02099519968032837
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,63,0.019555200636386872
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,63,0.019280000030994414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,63,0.019043199717998505
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,63,0.019057600200176238
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,63,0.018964800238609313
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,127,0.017080000042915343
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,63,0.019289599359035493
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,127,0.015438400208950043
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,127,0.015251199901103973
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,127,0.014569599926471711
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,127,0.014440000057220459
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,127,0.014263999462127686
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,127,0.014323200285434722
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,127,0.024736000597476958
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,127,0.023339200019836425
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,127,0.022951999306678773
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,127,0.022625599801540375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,127,0.02252320051193237
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,127,0.022356800734996796
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,127,0.022574399411678315
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,255,0.019883200526237488
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,255,0.01850239932537079
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,255,0.017774400115013123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,255,0.01757279932498932
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,255,0.0174687996506691
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,131071,3.691347122192383
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,255,0.017798399925231932
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,255,0.017473599314689635
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,255,0.030432000756263733
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,255,0.028468799591064454
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,255,0.02937600016593933
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,255,0.028060799837112425
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,255,0.02825759947299957
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,255,0.028358399868011475
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,511,0.022993600368499754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,511,0.027692800760269164
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,511,0.018531200289726258
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,255,0.05154079794883728
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,511,0.01968960016965866
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,511,0.018406400084495546
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,511,0.019732800126075745
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,511,0.019200000166893005
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,511,0.04747360050678253
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,511,0.04054720103740692
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,511,0.03592639863491058
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,511,0.035369598865509035
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,511,0.03635840117931366
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,511,0.036248001456260684
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,511,0.059329599142074585
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,1023,0.03537119925022125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,1023,0.02218240052461624
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,1023,0.026447999477386474
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,1023,0.020745599269866945
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,1023,0.019896000623703003
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,1023,0.021187199652194975
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,1023,0.033851200342178346
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,1023,0.06912959814071655
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,1023,0.0533407986164093
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,1023,0.06229280233383179
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,1023,0.0523472011089325
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,1023,0.056176000833511354
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,1023,0.08324800133705139
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,1023,0.05168160200119019
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,2047,0.0439983993768692
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,2047,0.03188959956169128
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,2047,0.03777920007705689
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,2047,0.02784479856491089
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,2047,0.03279840052127838
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,2047,0.026118400692939758
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,2047,0.025910401344299318
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,2047,0.10665760040283204
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,2047,0.0929696023464203
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,2047,0.09748799800872802
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,2047,0.09178400039672852
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,2047,0.09311839938163757
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,2047,0.11468960046768188
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,2047,0.09080160260200501
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,4095,0.05332319736480713
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,4095,0.05944960117340088
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,4095,0.04737440049648285
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,4095,0.046881601214408875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,4095,0.04493280053138733
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,4095,0.04549280107021332
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,4095,0.045552000403404236
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,4095,0.1785920023918152
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,4095,0.16393280029296875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,4095,0.17060320377349852
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,4095,0.16283520460128784
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,4095,0.1641551971435547
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,4095,0.18831839561462402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,4095,0.1630239963531494
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,8191,0.08705120086669922
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,8191,0.07238879799842834
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,8191,0.07882879972457886
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,8191,0.06918879747390747
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,8191,0.07208960056304932
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,8191,0.07184960246086121
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,8191,0.07120640277862549
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,8191,0.31979520320892335
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,8191,0.30405759811401367
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,8191,0.3020607948303223
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,8191,0.30355679988861084
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,8191,0.350379204750061
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,8191,0.3081984043121338
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,16383,0.14077759981155397
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,16383,0.12600799798965454
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,16383,0.11985119581222534
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,8191,0.30828640460968015
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,16383,0.1161296010017395
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,16383,0.12224800586700439
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,16383,0.11866879463195801
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,16383,0.12094559669494628
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,16383,0.595084810256958
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,16383,0.5766960144042969
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,16383,0.5838175773620605
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,16383,0.5774064064025879
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,16383,0.7343167781829834
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,16383,0.5732079982757569
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,32767,0.22197279930114747
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,32767,0.2500351905822754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,32767,0.21555039882659913
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,32767,0.211516809463501
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,32767,0.2234112024307251
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,16383,0.5786272048950195
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,32767,0.21571519374847412
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,32767,0.21597120761871338
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,32767,1.1447199821472167
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,32767,1.1258288383483888
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,32767,1.1156352043151856
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,32767,1.1199328422546386
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,32767,1.1191984176635743
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,32767,1.5777296066284179
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,65535,0.46509599685668945
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,65535,0.41393117904663085
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,65535,0.40801119804382324
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,65535,0.4038671970367432
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,65535,0.40218238830566405
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,32767,1.1159520149230957
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,65535,0.47167201042175294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,65535,0.4070608139038086
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,131071,0.9132863998413085
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,65535,2.252996826171875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,65535,2.2180543899536134
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,65535,2.2101247787475584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,65535,2.2072208404541014
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,65535,2.1877904891967774
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,65535,2.2095903396606444
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,131071,0.7962207794189453
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,131071,0.7893983840942382
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,131071,0.7868959903717041
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,131071,0.7837999820709228
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,131071,0.7869728088378907
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,131071,0.7866127967834473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,65535,3.5072017669677735
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,131071,4.490911865234375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,1,0.016364799439907075
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,131071,4.37151985168457
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,131071,4.353007888793945
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,1,0.014779199659824372
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,131071,4.3399711608886715
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,131071,4.366363143920898
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,131071,4.3318641662597654
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,1,0.013312000036239623
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,1,0.013537600636482239
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,1,0.013011200726032257
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,1,0.018670399487018586
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,1,0.019531199336051942
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,1,0.021751999855041504
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,1,0.013249599933624267
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,1,0.013214400410652161
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,1,0.020257599651813507
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,1,0.018564799427986146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,1,0.019068799912929535
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,1,0.018719999492168425
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,3,0.01615840047597885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,3,0.01478559970855713
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,3,0.013532799482345582
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,3,0.012862400710582733
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,3,0.013070400059223174
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,3,0.012967999279499053
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,3,0.013299199938774108
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,3,0.021044799685478212
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,3,0.02112320065498352
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,3,0.019016000628471374
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,3,0.01897760033607483
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,3,0.018721599876880646
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,3,0.018756799399852753
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,3,0.018972800672054292
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,7,0.015996800363063814
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,7,0.013468800485134125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,7,0.014830400049686433
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,7,0.01372160017490387
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,7,0.01308639943599701
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,7,0.013315199315547943
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,7,0.013592000305652618
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,7,0.021219199895858763
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,7,0.020559999346733093
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,7,0.019083200395107268
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,7,0.018943999707698823
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,7,0.018984000384807586
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,7,0.019443200528621675
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,15,0.015358400344848634
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,7,0.018812799453735353
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,15,0.014747199416160584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,15,0.013544000685214996
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,15,0.013484799861907959
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,15,0.013129599392414093
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,15,0.013419200479984284
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,15,0.013332800567150116
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,15,0.02170879989862442
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,15,0.02093600034713745
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,15,0.020025600492954255
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,15,0.01902880072593689
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,15,0.01900160014629364
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,15,0.019315199553966524
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,15,0.019211199879646302
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,31,0.01592479944229126
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,31,0.015561600029468537
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,31,0.013792000710964203
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,31,0.013470399379730224
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,31,0.013390399515628815
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,31,0.01342719942331314
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,31,0.01348160058259964
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,31,0.02213120013475418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,31,0.02062239944934845
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,31,0.020032000541687012
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,31,0.019606399536132812
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,31,0.01942880004644394
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,31,0.019679999351501463
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,63,0.015600000321865082
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,31,0.01988160014152527
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,63,0.01557759940624237
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,63,0.014609600603580474
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,63,0.013984000682830811
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,63,0.013924799859523773
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,63,0.013540799915790557
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,63,0.01422560065984726
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,63,0.023635199666023253
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,63,0.023427200317382813
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,63,0.022193600237369538
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,63,0.02184640020132065
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,63,0.021558399498462676
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,63,0.02194879949092865
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,63,0.022176000475883483
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,127,0.017297600209712983
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,127,0.017123199999332428
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,127,0.016212800145149232
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,127,0.015643200278282164
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,127,0.01560640037059784
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,127,0.015526400506496429
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,127,0.015652799606323244
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,127,0.02791680097579956
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,127,0.02772960066795349
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,127,0.026876801252365114
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,127,0.026374399662017822
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,127,0.026150399446487428
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,131071,6.742183685302734
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,127,0.02609440088272095
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,255,0.020304000377655028
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,255,0.020640000700950623
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,127,0.026388800144195555
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,255,0.01931840032339096
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,255,0.018769599497318268
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,255,0.01854719966650009
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,255,0.01865759938955307
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,255,0.03787679970264435
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,255,0.036697599291801455
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,255,0.03548319935798645
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,255,0.03471519947052002
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,255,0.03527199923992157
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,255,0.03477759957313538
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,255,0.0336656004190445
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,255,0.03556320071220398
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,511,0.03911679983139038
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,511,0.022622400522232057
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,511,0.021027199923992157
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,511,0.021792000532150267
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,511,0.02943840026855469
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,511,0.0221343994140625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,511,0.03354560136795044
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,511,0.07251359820365906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,511,0.06514880061149597
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,511,0.0522816002368927
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,511,0.052112001180648806
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,511,0.05422719717025757
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,511,0.058651202917099
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,511,0.08062880039215088
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,1023,0.04774399995803833
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,1023,0.029271999001502992
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,1023,0.041529598832130435
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,1023,0.02441119998693466
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,1023,0.034190401434898376
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,1023,0.0247871994972229
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,1023,0.031632000207901
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,1023,0.11047040224075318
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,1023,0.1001695990562439
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,1023,0.09129760265350342
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,1023,0.09201599955558777
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,1023,0.0941103994846344
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,1023,0.1123471975326538
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,1023,0.09201920032501221
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,2047,0.06332319974899292
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,2047,0.05626559853553772
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,2047,0.05021920204162598
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,2047,0.04376479983329773
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,2047,0.04575999975204468
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,2047,0.044424000382423404
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,2047,0.0486735999584198
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,2047,0.1722208023071289
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,2047,0.18356800079345703
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,2047,0.1895807981491089
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,2047,0.1643231987953186
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,2047,0.1636255979537964
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,2047,0.16375999450683593
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,2047,0.1625424027442932
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,4095,0.09133440256118774
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,4095,0.08087360262870788
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,4095,0.07750560045242309
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,4095,0.06782240271568299
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,4095,0.06951040029525757
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,4095,0.06987199783325196
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,4095,0.06994879841804505
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,4095,0.3127919912338257
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,4095,0.32146079540252687
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,4095,0.36436638832092283
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,4095,0.30599839687347413
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,4095,0.30123999118804934
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,4095,0.30431199073791504
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,4095,0.30253279209136963
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,8191,0.14589120149612428
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,8191,0.12882720232009887
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,8191,0.11829279661178589
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,8191,0.12915199995040894
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,8191,0.11614880561828614
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,8191,0.12100479602813721
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,8191,0.12148480415344239
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,8191,0.5801663875579834
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,8191,0.6027040004730224
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,8191,0.5816415786743164
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,8191,0.5721776008605957
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,8191,0.5778848171234131
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,8191,0.5765488147735596
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,8191,0.7050479888916016
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,16383,0.2536448001861572
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,16383,0.2160640001296997
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,16383,0.2245311975479126
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,16383,0.21338241100311278
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,16383,0.21132960319519042
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,16383,0.2159264087677002
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,16383,0.22948639392852782
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,16383,1.1216704368591308
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,16383,1.155887985229492
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,16383,1.1156959533691406
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,16383,1.1154656410217285
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,16383,1.1107279777526855
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,16383,1.569822406768799
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,32767,0.4170656204223633
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,32767,0.40457921028137206
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,32767,0.40783038139343264
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,32767,0.4735568046569824
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,32767,0.40702238082885744
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,16383,1.1160287857055664
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,32767,0.4726255893707275
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,32767,0.4077295780181885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,32767,2.246299171447754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,32767,2.201907157897949
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,32767,2.2116655349731444
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,32767,2.1996896743774412
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,32767,2.1894767761230467
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,32767,2.2089584350585936
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,65535,0.7995567798614502
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,65535,0.7907423973083496
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,32767,3.281572723388672
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,65535,0.9184720039367675
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,65535,0.7842351913452148
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,65535,0.7834735870361328
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,65535,0.78712158203125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,65535,0.7869455814361572
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,1,0.017080000042915343
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,1,0.015996800363063814
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,1,0.014412799477577209
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,1,0.014214399456977844
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,1,0.013814400136470794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,1,0.013825599849224091
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,1,0.013870400190353394
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,1,0.023287999629974365
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,1,0.02210880070924759
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,1,0.02025440037250519
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,1,0.019836799800395967
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,1,0.02001280039548874
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,1,0.020022399723529816
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,1,0.019974400103092194
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,3,0.01701280027627945
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,3,0.015447999536991119
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,3,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,3,0.014319999516010285
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,3,0.014095999300479889
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,3,0.013967999815940857
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,3,0.014051200449466705
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,3,0.022908799350261688
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,3,0.021657599508762358
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,3,0.020576000213623047
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,3,0.02030239999294281
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,3,0.020168000459671022
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,3,0.01992959976196289
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,3,0.019952000677585603
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,7,0.01674560010433197
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,7,0.01565600037574768
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,65535,4.48089599609375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,7,0.01438080072402954
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,65535,4.429430389404297
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,65535,4.338019180297851
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,65535,4.377825546264648
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,7,0.01446399986743927
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,7,0.01406240016222
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,65535,4.367558288574219
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,65535,4.376023864746093
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,7,0.019976000487804412
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,7,0.014006400108337402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,7,0.023447999358177186
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,7,0.02216479927301407
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,7,0.014158399403095245
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,7,0.02008160054683685
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,7,0.01982560008764267
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,7,0.019859200716018675
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,15,0.016100800037384032
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,15,0.0169855996966362
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,15,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,7,0.020132799446582795
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,15,0.014528000354766845
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,15,0.013903999328613281
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,15,0.014256000518798828
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,15,0.023292799293994904
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,15,0.022043199837207796
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,15,0.014177599549293518
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,15,0.020420800149440765
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,15,0.020764799416065217
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,15,0.020095999538898467
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,15,0.020529599487781526
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,15,0.020345599949359895
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,31,0.017078399658203125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,31,0.015667200088500977
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,31,0.014871999621391296
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,31,0.014270399510860444
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,31,0.014363199472427368
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,31,0.014587199687957764
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,31,0.01438080072402954
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,31,0.025116801261901855
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,31,0.023975999653339387
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,31,0.02282399982213974
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,31,0.022300800681114195
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,31,0.022519999742507936
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,31,0.022441600263118745
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,31,0.022516800463199614
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,63,0.01721920073032379
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,63,0.016345599293708803
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,63,0.015460799634456634
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,63,0.015307199954986573
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,63,0.015252800285816192
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,63,0.014643199741840363
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,63,0.01449279934167862
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,63,0.02696479856967926
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,63,0.029135999083518983
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,63,0.02632000148296356
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,63,0.025598400831222536
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,63,0.025785601139068602
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,63,0.02529279887676239
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,63,0.025220799446105956
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,127,0.019726400077342988
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,127,0.018838399648666383
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,127,0.016739200055599212
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,127,0.016739200055599212
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,127,0.016896000504493712
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,127,0.016359999775886536
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,127,0.016315199434757233
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,127,0.038833600282669065
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,127,0.03331199884414673
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,127,0.03518239855766296
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,127,0.03346399962902069
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,127,0.0329584002494812
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,127,0.032734400033950804
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,127,0.03344959914684296
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,65535,6.737670135498047
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,255,0.02380480021238327
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,255,0.023384000360965728
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,255,0.020529599487781526
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,255,0.020161600410938264
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,255,0.019776000082492827
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,255,0.02017119973897934
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,255,0.05899519920349121
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,255,0.054657602310180665
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,255,0.04863359928131104
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,255,0.03478400111198425
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,255,0.0501583993434906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,255,0.05159199833869934
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,255,0.05287839770317078
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,255,0.04873439967632294
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,511,0.04007680118083954
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,511,0.04523679912090302
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,511,0.026552000641822816
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,511,0.026519998908042908
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,511,0.03203040063381195
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,511,0.025777599215507506
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,511,0.048956799507141116
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,511,0.10425440073013306
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,511,0.1036080002784729
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,511,0.09223359823226929
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,511,0.09717440009117126
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,511,0.09291200041770935
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,511,0.09179040193557739
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,511,0.13264479637145996
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,1023,0.060022401809692386
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,1023,0.05390560030937195
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,1023,0.050672000646591185
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,1023,0.046675199270248414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,1023,0.04557119905948639
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,1023,0.04458400011062622
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,1023,0.05713760256767273
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,1023,0.16790560483932496
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,1023,0.17638239860534669
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,1023,0.1662368059158325
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,1023,0.17470240592956543
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,1023,0.16371359825134277
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,1023,0.16471199989318847
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,1023,0.21087040901184081
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,2047,0.08066400289535522
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,2047,0.08485119938850402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,2047,0.07155200242996215
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,2047,0.06998080015182495
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,2047,0.07566559910774232
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,2047,0.06921280026435853
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,2047,0.078302401304245
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,2047,0.31446080207824706
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,2047,0.31499519348144533
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,2047,0.30703680515289306
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,2047,0.3052720069885254
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,2047,0.3034480094909668
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,2047,0.30367839336395264
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,2047,0.3676512002944946
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,4095,0.13600480556488037
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,4095,0.11896480321884155
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,4095,0.1164639949798584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,4095,0.12369439601898194
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,4095,0.1334432005882263
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,4095,0.11956319808959961
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,4095,0.1320207953453064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,4095,0.591593599319458
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,4095,0.5762656211853028
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,4095,0.5871151924133301
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,4095,0.5752352237701416
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,4095,0.578656005859375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,4095,0.581063985824585
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,4095,0.7275887966156006
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,8191,0.21398239135742186
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,8191,0.2447727918624878
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,8191,0.2122112035751343
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,8191,0.21991519927978515
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,8191,0.22964799404144287
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,8191,0.23287360668182372
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,8191,0.2215967893600464
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,8191,1.146459197998047
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,8191,1.1379584312438964
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,8191,1.1203776359558106
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,8191,1.118175983428955
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,8191,1.1207551956176758
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,8191,1.5644191741943358
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,16383,0.4121376037597656
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,16383,0.40653119087219236
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,16383,0.4219327926635742
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,16383,0.4654543876647949
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,8191,1.125499153137207
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,16383,0.4021552085876465
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,16383,0.46338558197021484
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,16383,0.41249761581420896
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,16383,2.2529008865356444
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,16383,2.2138927459716795
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,16383,2.205886459350586
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,16383,2.2227344512939453
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,16383,2.2011295318603517
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,16383,2.2038911819458007
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,16383,3.213390350341797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,32767,0.8033552169799805
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,32767,0.9188480377197266
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,32767,0.7929984092712402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,32767,0.7852960109710694
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,32767,0.787220811843872
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,32767,0.789408016204834
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,32767,0.7918848037719727
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,1,0.028297600150108338
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,1,0.01774719953536987
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,1,0.015801599621772765
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,1,0.015919999778270723
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,1,0.01573439985513687
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,1,0.015652799606323244
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,1,0.01526239961385727
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,1,0.03436160087585449
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,1,0.02359360009431839
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,1,0.021561600267887115
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,1,0.02176000028848648
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,1,0.021588799357414246
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,1,0.02150239944458008
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,1,0.02152000069618225
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,3,0.027326399087905885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,3,0.01778080016374588
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,3,0.015542399883270264
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,3,0.01569119989871979
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,3,0.01510400027036667
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,3,0.015643200278282164
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,3,0.015143999457359314
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,3,0.03515680134296417
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,3,0.02340800017118454
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,3,0.02221280038356781
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,3,0.021614399552345277
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,3,0.02125920057296753
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,3,0.021385599672794343
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,3,0.021855999529361726
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,7,0.02744640111923218
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,7,0.01730560064315796
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,7,0.015489600598812103
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,7,0.01566080003976822
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,7,0.015307199954986573
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,32767,4.3976318359375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,7,0.015612800419330598
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,32767,4.384299087524414
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,32767,4.334584045410156
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,32767,4.380867385864258
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,32767,4.3528495788574215
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,7,0.03658719956874847
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,7,0.015323199331760406
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,32767,4.360480117797851
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,7,0.02197919934988022
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,7,0.0217071995139122
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,7,0.02404640018939972
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,7,0.022142399847507478
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,7,0.021844799816608428
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,15,0.01626719981431961
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,15,0.017985600233078002
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,15,0.030156800150871278
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,7,0.02171359956264496
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,15,0.015659199655056
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,15,0.015430399775505066
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,15,0.01594720035791397
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,15,0.015358400344848634
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,15,0.040299201011657716
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,15,0.0244159996509552
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,15,0.025939199328422546
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,15,0.02412319928407669
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,15,0.023819200694561005
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,15,0.02427999973297119
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,15,0.023894399404525757
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,31,0.029291200637817382
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,31,0.017951999604701997
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,31,0.015887999534606935
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,31,0.015451200306415558
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,31,0.01592320054769516
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,31,0.01555359959602356
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,31,0.015292799472808838
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,31,0.02890399992465973
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,31,0.04513759911060333
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,31,0.027084800601005554
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,31,0.026892799139022826
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,31,0.026716798543930054
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,31,0.026368001103401185
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,31,0.026555201411247252
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,63,0.03235679864883423
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,63,0.016673600673675536
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,63,0.01942880004644394
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,63,0.01642560064792633
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,63,0.016225600242614747
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,63,0.01615840047597885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,63,0.01671359986066818
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,63,0.05117120146751404
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,63,0.03711999952793121
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,63,0.03306399881839752
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,63,0.03391520082950592
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,63,0.03329919874668121
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,63,0.0335072010755539
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,63,0.03293119966983795
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,127,0.03949599862098694
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,127,0.022579200565814972
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,127,0.018958400189876556
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,127,0.018880000710487364
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,127,0.01913599967956543
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,127,0.01892800033092499
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,127,0.018889600038528444
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,127,0.07122240066528321
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,127,0.057392001152038574
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,127,0.0528656005859375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,127,0.04857760071754456
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,127,0.04930399954319
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,127,0.051286399364471436
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,127,0.04800640046596527
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,255,0.04594079852104187
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,255,0.03415679931640625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,255,0.028172799944877626
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,255,0.023947200179100035
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,255,0.02579360008239746
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,255,0.023768000304698944
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,255,0.022788800299167633
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,255,0.10895520448684692
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,255,0.09043999910354614
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,255,0.09043200016021728
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,255,0.08884639739990234
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,255,0.08798400163650513
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,255,0.08823360204696655
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,255,0.08567839860916138
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,511,0.0702015995979309
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,32767,7.636412811279297
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,511,0.05246719717979431
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,511,0.044180798530578616
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,511,0.04437119960784912
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,511,0.04493120014667511
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,511,0.04726560115814209
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,511,0.0407696008682251
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,511,0.16441919803619384
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,511,0.17092159986495972
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,511,0.16604000329971313
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,511,0.16280959844589232
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,511,0.16283680200576783
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,511,0.16230239868164062
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,1023,0.09842879772186279
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,511,0.31943199634552
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,1023,0.07686560153961182
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,1023,0.06807199716567994
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,1023,0.06951839923858642
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,1023,0.07249600291252137
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,1023,0.06859679818153382
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,1023,0.06592159867286682
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,1023,0.3086672067642212
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,1023,0.30380640029907224
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,1023,0.30065441131591797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,1023,0.30093119144439695
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,1023,0.30132479667663575
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,1023,0.30123999118804934
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,1023,0.4779983997344971
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,2047,0.12464640140533448
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,2047,0.15175199508666992
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,2047,0.11727360486984253
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,2047,0.1196943998336792
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,2047,0.11565279960632324
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,2047,0.11610879898071289
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,2047,0.12479679584503174
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,2047,0.6082816123962402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,2047,0.5737343788146972
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,2047,0.5726816177368164
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,2047,0.5733776092529297
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,2047,0.5707087993621827
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,2047,0.5806816101074219
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,2047,0.7475776195526123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,4095,0.2200400114059448
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,4095,0.21485600471496583
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,4095,0.21238560676574708
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,4095,0.21112799644470215
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,4095,0.26236639022827146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,4095,0.22201759815216066
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,4095,0.21017920970916748
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,4095,1.1178848266601562
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,4095,1.1169487953186035
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,4095,1.1555407524108887
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,4095,1.1161855697631835
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,4095,1.1096431732177734
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,4095,1.5678735733032227
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,8191,0.40320639610290526
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,8191,0.40607199668884275
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,8191,0.40147199630737307
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,8191,0.4167183876037598
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,8191,0.4830063819885254
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,4095,1.1140671730041505
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,8191,0.41598238945007326
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,8191,0.40065598487854004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,8191,2.197854423522949
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,8191,2.204759979248047
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,8191,2.271267127990723
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,8191,2.1931999206542967
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,8191,2.196788787841797
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,8191,2.1898687362670897
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,8191,3.160479927062988
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,16383,0.7808479785919189
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,16383,0.7876543998718262
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,16383,0.7924032211303711
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,16383,0.7844944000244141
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,16383,0.7828559875488281
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,16383,0.9274720191955567
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,16383,0.8179280281066894
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,1,0.10277760028839111
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,1,0.04516319930553436
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,1,0.04002879858016968
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,1,0.03924959897994995
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,1,0.03875359892845154
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,1,0.03901279866695404
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,1,0.030177599191665648
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,1,0.07242559790611267
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,1,0.04341599941253662
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,1,0.03941439986228943
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,1,0.0385919988155365
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,1,0.03246879875659943
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,1,0.032662400603294374
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,1,0.03246400058269501
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,3,0.05721279978752136
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,3,0.03094559907913208
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,3,0.025284799933433532
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,3,0.024644799530506134
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,3,0.024417600035667418
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,3,0.024508799612522125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,3,0.024191999435424806
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,3,0.06366559863090515
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,3,0.03738240003585815
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,3,0.030692800879478455
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,3,0.030244800448417663
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,16383,4.382270431518554
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,16383,4.483089447021484
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,3,0.030008000135421754
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,16383,4.332632064819336
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,16383,4.388359832763672
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,16383,4.391985702514648
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,16383,4.378643035888672
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,3,0.02905600070953369
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,7,0.05282719731330872
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,16383,4.3748126983642575
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,7,0.028439998626708984
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,3,0.028734400868415833
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,7,0.02234400063753128
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,7,0.022809599339962006
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,7,0.022888000309467315
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,7,0.022313599288463593
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,7,0.039208000898361205
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,7,0.023044799268245698
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,7,0.032385599613189694
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,7,0.031046399474143983
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,7,0.031115201115608216
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,7,0.06484479904174804
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,7,0.031401601433753965
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,15,0.02837440073490143
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,7,0.031038400530815125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,15,0.022939200699329376
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,15,0.05278720259666443
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,15,0.02335200011730194
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,15,0.02242079973220825
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,15,0.022809599339962006
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,15,0.02287199944257736
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,15,0.06648160219192505
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,15,0.0435808002948761
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,15,0.033817601203918454
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,15,0.033769598603248595
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,15,0.03375039994716644
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,15,0.034771201014518735
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,15,0.03394559919834137
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,31,0.05495679974555969
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,31,0.023476800322532652
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,31,0.03149600028991699
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,31,0.023027199506759643
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,31,0.0225040003657341
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,31,0.02266719937324524
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,31,0.022793599963188173
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,31,0.04216800034046173
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,31,0.05202879905700684
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,31,0.07281759977340699
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,31,0.038689601421356204
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,31,0.041631999611854556
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,31,0.03937920033931732
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,31,0.039208000898361205
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,63,0.05767679810523987
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,63,0.03734239935874939
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,63,0.02375199943780899
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,63,0.024054400622844696
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,63,0.026596799492836
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,63,0.023521600663661955
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,63,0.023867200314998626
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,63,0.06922720074653625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,63,0.06232320070266724
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,63,0.091430401802063
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,63,0.055201601982116696
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,63,0.05834239721298218
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,63,0.05537440180778504
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,63,0.05407040119171143
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,127,0.043828800320625305
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,127,0.06578400135040283
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,127,0.03855679929256439
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,127,0.03519839942455292
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,127,0.03144800066947937
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,127,0.029543998837471008
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,127,0.029758399724960326
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,127,0.10677920579910279
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,127,0.09993759989738464
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,127,0.1291983962059021
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,127,0.09799360036849976
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,127,0.0952351987361908
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,127,0.09504799842834473
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,127,0.09527680277824402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,255,0.05799679756164551
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,255,0.0523248016834259
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,255,0.07941600084304809
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,255,0.047443199157714847
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,255,0.0494623988866806
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,255,0.04653120040893555
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,255,0.0472463995218277
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,255,0.17781280279159545
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,255,0.17200000286102296
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,255,0.19817279577255248
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,255,0.16856000423431397
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,255,0.16892160177230836
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,255,0.16870880126953125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,255,0.16784160137176513
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,511,0.1270959973335266
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,511,0.08290079832077027
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,511,0.07514079809188842
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,511,0.07850239872932434
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,511,0.09215360283851623
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,511,0.07565119862556458
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,511,0.07498080134391785
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,511,0.3243103981018066
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,511,0.3603760004043579
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,511,0.31077759265899657
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,511,0.31248319149017334
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,511,0.31773600578308103
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,511,0.3102384090423584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,511,0.31044321060180663
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,1023,0.14018720388412476
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,1023,0.18281760215759277
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,1023,0.13045120239257812
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,1023,0.12161279916763305
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,1023,0.12556960582733154
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,1023,0.12318559885025024
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,1023,0.12141599655151367
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,1023,0.5885583877563476
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,1023,0.5829071998596191
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,1023,0.5943039894104004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,1023,0.6413040161132812
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,1023,0.5796527862548828
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,1023,0.5818927764892579
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,1023,0.5807983875274658
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,2047,0.21689920425415038
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,2047,0.23391520977020264
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,2047,0.21521279811859131
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,2047,0.22483038902282715
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,2047,0.22013280391693116
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,2047,0.28931679725646975
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,2047,0.21727681159973145
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,2047,1.1936752319335937
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,2047,1.1178895950317382
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,2047,1.1326592445373536
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,2047,1.1199999809265138
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,2047,1.1415040016174316
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,2047,1.1243488311767578
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,2047,1.1253151893615723
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,4095,0.5283631801605224
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,4095,0.42499518394470215
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,4095,0.40931038856506347
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,4095,0.4068895816802979
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,4095,0.41501760482788086
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,4095,0.40511360168457033
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,4095,0.40558719635009766
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,4095,2.2109167098999025
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,4095,2.224051284790039
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,4095,2.212841606140137
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,4095,2.2171375274658205
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,4095,2.2101696014404295
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,4095,2.2998432159423827
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,4095,2.9395824432373048
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,8191,0.8003696441650391
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,8191,0.78580322265625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,8191,0.7889279842376709
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,8191,0.7940095901489258
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,8191,0.8049776077270507
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,8191,0.9570832252502441
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,8191,0.8078399658203125
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,1,0.09352160096168519
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,1,0.05315679907798767
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,1,0.041300800442695615
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,1,0.03795520067214966
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,1,0.0372079998254776
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,1,0.03684960007667541
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,1,0.03694399893283844
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,1,0.10064159631729126
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,1,0.061326402425765994
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,1,0.05026879906654358
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,1,0.044654399156570435
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,1,0.043647998571395875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,1,0.04360319972038269
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,1,0.04359839856624603
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,3,0.09281119704246521
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,3,0.05301920175552368
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,3,0.04104000031948089
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,3,0.038083198666572574
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,8191,4.398356628417969
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,8191,4.514039993286133
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,3,0.037308800220489505
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,8191,4.385684967041016
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,8191,4.378496170043945
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,8191,4.39756965637207
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,8191,4.362633514404297
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,3,0.03683040142059326
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,3,0.10326880216598511
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,3,0.05412639975547791
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,3,0.06342399716377259
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,3,0.0369951993227005
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,3,0.04535039961338043
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,3,0.04513440132141113
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,3,0.045500800013542175
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,3,0.04708479940891266
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,7,0.04364640116691589
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,7,0.036950400471687316
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,7,0.037036800384521486
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,7,0.0538640022277832
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,7,0.038104000687599185
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,7,0.03731519877910614
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,7,0.09331679940223694
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,7,0.10660799741744995
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,7,0.06767039895057678
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,7,0.049527999758720395
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,7,0.04803040027618408
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,7,0.05584959983825684
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,7,0.04848639965057373
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,7,0.04839679896831513
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,15,0.042742401361465454
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,15,0.0954688012599945
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,15,0.055307197570800784
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,15,0.03803679943084717
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,15,0.03733760118484497
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,15,0.03726080060005188
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,15,0.037254399061203
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,15,0.07305920124053955
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,15,0.06437119841575623
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,15,0.11442559957504272
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,15,0.056809598207473756
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,15,0.05454559922218323
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,15,0.053881597518920896
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,15,0.05414720177650452
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,31,0.09803680181503296
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,31,0.03842720091342926
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,31,0.04559360146522522
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,31,0.05664160251617432
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,31,0.03707360029220581
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,31,0.03751200139522552
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,31,0.03744640052318573
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,31,0.1336527943611145
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,31,0.08009439706802368
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,31,0.09147040247917175
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,31,0.07086719870567322
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,31,0.07439360022544861
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,31,0.06684479713439942
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,31,0.06871039867401123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,63,0.06293280124664306
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,63,0.10071519613265992
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,63,0.0520304024219513
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,63,0.04759680032730103
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,63,0.04291200041770935
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,63,0.044756799936294556
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,63,0.041571199893951416
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,63,0.1261296033859253
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,63,0.16511199474334717
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,63,0.11615999937057495
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,63,0.11210559606552124
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,63,0.1068160057067871
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,63,0.10612159967422485
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,63,0.10557279586791993
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,8191,6.5488739013671875
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,127,0.07489280104637146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,127,0.11385760307312012
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,127,0.05841599702835083
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,127,0.05713120102882385
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,127,0.06580320000648499
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,127,0.05611680150032043
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,127,0.08703839778900146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,127,0.19356640577316284
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,127,0.17990880012512206
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,127,0.23406240940093995
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,127,0.17530560493469238
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,127,0.1850783944129944
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,127,0.17810879945755004
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,255,0.10108959674835205
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,255,0.14232800006866456
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,255,0.09042400121688843
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,255,0.08517439961433411
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,255,0.0829967975616455
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,255,0.08272320032119751
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,127,0.2589936017990112
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,255,0.07998719811439514
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,255,0.33368959426879885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,255,0.3731264114379883
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,255,0.3171760082244873
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,255,0.3160176038742065
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,255,0.3125407934188843
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,255,0.31169440746307375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,255,0.42687201499938965
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,511,0.1702623963356018
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,511,0.13827040195465087
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,511,0.13734560012817382
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,511,0.15167839527130128
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,511,0.143286395072937
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,511,0.23542399406433107
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,511,0.15546239614486695
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,511,0.6077568054199218
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,511,0.5988192081451416
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,511,0.6252799987792969
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,511,0.6912112236022949
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,511,0.7163824081420899
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,511,0.5953616142272949
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,511,0.5926112174987793
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,1023,0.24454081058502197
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,1023,0.23663198947906494
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,1023,0.2517152070999146
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,1023,0.26218879222869873
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,1023,0.3459696054458618
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,1023,0.2292896032333374
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,1023,0.22652800083160402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,1023,1.1690287590026855
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,1023,1.2418815612792968
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,1023,1.1365376472473145
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,1023,1.1267040252685547
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,1023,1.1326160430908203
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,1023,1.1320511817932128
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,1023,1.5043919563293457
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,2047,0.43282718658447267
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,2047,0.45087199211120604
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,2047,0.4162799835205078
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,2047,0.41928958892822266
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,2047,0.4237199783325195
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,2047,0.565883207321167
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,2047,0.4455999851226807
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,2047,2.2065248489379883
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,4095,1.0303008079528808
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,2047,2.228915214538574
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,2047,2.2271760940551757
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,2047,2.2468496322631837
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,2047,2.3417007446289064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,2047,2.2203855514526367
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,2047,3.254792022705078
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,4095,0.8485487937927246
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,4095,0.8033424377441406
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,4095,0.796343994140625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,4095,0.888270378112793
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,4095,0.7945504188537598
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,4095,0.8187775611877441
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,1,0.17009119987487792
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,1,0.08961439728736878
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,1,0.07428159713745117
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,1,0.06585119962692261
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,1,0.06171680092811584
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,1,0.060310399532318114
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,1,0.05990719795227051
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,1,0.1722656011581421
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,1,0.09988800287246705
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,1,0.08394399881362916
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,1,0.07626720070838929
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,1,0.06979680061340332
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,1,0.06902239918708801
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,4095,4.624219131469727
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,1,0.06924160122871399
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,3,0.16608480215072632
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,3,0.08998879790306091
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,4095,4.362343978881836
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,4095,4.4037822723388675
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,3,0.07437440156936645
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,4095,4.361576080322266
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,4095,4.382036972045898
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,4095,4.4375968933105465
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,3,0.06602399945259094
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,3,0.061831998825073245
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,3,0.060817599296569824
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,3,0.06016960144042969
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,3,0.0879472017288208
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,3,0.08149759769439698
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,3,0.07350879907608032
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,3,0.18483519554138184
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,3,0.10961439609527587
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,3,0.0721343994140625
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,3,0.07113919854164123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,7,0.07465440034866333
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,7,0.09372479915618896
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,7,0.060736000537872314
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,7,0.0603007972240448
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,7,0.06573119759559631
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,7,0.16656800508499145
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,7,0.06026880145072937
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,7,0.11463840007781982
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,7,0.09579839706420898
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,7,0.08780320286750794
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,7,0.0822928011417389
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,7,0.07853919863700867
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,7,0.07699040174484253
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,7,0.1884927988052368
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,4095,6.639649963378906
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,15,0.06715999841690064
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,15,0.07602880001068116
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,15,0.09636160135269164
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,15,0.06120160222053528
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,15,0.17069599628448487
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,15,0.06149439811706543
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,15,0.1295232057571411
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,15,0.11231520175933837
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,15,0.20699520111083985
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,15,0.09800480008125305
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,15,0.10203360319137574
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,15,0.09620320200920104
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,15,0.09353280067443848
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,15,0.09636160135269164
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,31,0.0980351984500885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,31,0.17287039756774902
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,31,0.07808480262756348
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,31,0.06941279768943787
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,31,0.0651423990726471
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,31,0.061799997091293336
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,31,0.09675520062446594
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,31,0.16348320245742798
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,31,0.14273279905319214
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,31,0.12986719608306885
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,31,0.13390719890594482
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,31,0.1262992024421692
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,31,0.2388927936553955
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,63,0.08643040060997009
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,63,0.10916800498962402
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,63,0.07681120038032532
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,63,0.17728159427642823
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,63,0.07174720168113709
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,63,0.07310879826545716
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,31,0.1672335982322693
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,63,0.07153120040893554
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,63,0.2269376039505005
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,63,0.19667840003967285
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,63,0.20559520721435548
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,63,0.19282879829406738
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,63,0.29851679801940917
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,63,0.19250080585479737
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,63,0.2470736026763916
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,127,0.11031359434127808
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,127,0.13130719661712648
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,127,0.09220960140228271
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,127,0.09803839921951293
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,127,0.09290720224380493
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,127,0.10627679824829102
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,127,0.20143840312957764
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,127,0.34464321136474607
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,127,0.362992000579834
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,127,0.3315984010696411
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,127,0.32198879718780515
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,127,0.32458560466766356
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,127,0.4324143886566162
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,127,0.38241119384765626
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,255,0.15652159452438355
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,255,0.17749600410461425
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,255,0.14600000381469727
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,255,0.14056960344314576
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,255,0.14058879613876343
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,255,0.25804159641265867
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,255,0.15728960037231446
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,255,0.6314352035522461
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,255,0.7087423801422119
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,255,0.6102320194244385
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,255,0.5991375923156739
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,255,0.5920959949493408
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,255,0.5914976119995117
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,511,0.4371471881866455
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,255,0.7668799877166748
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,511,0.2741375923156738
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,511,0.24728960990905763
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,511,0.25628321170806884
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,511,0.30881760120391843
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,511,0.25056159496307373
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,511,0.24632799625396729
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,511,1.2184831619262695
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,511,1.1723039627075196
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,511,1.1553728103637695
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,511,1.1564175605773925
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,511,1.1420415878295898
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,511,1.1525343894958495
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,511,1.770047950744629
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,1023,0.4391791820526123
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,1023,0.5036704063415527
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,1023,0.4351791858673096
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,1023,0.46620640754699705
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,1023,0.44991202354431153
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,1023,0.657259178161621
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,1023,0.4767263889312744
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,1023,2.2702463150024412
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,1023,2.430177688598633
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,1023,2.2424127578735353
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,1023,2.2354400634765623
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,1023,2.3039648056030275
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,1023,2.236382484436035
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,1023,2.909220886230469
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,2047,0.8167584419250489
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,2047,0.8283200263977051
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,2047,0.848971176147461
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,2047,0.8110464096069336
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,2047,0.8929344177246094
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,2047,1.1281087875366211
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,2047,0.8519120216369629
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,2047,4.698345565795899
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,2047,4.388334274291992
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,2047,4.3764495849609375
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,2047,4.4516864776611325
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,2047,4.4376575469970705
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,2047,4.3600719451904295
SGLang,0.5.8.post1,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,2047,4.478007888793945
