framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1,1,0,0.1006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4,1,0,0.1002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8,1,0,0.1006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16,1,0,0.1037
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,32,1,0,0.1119
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,64,1,0,0.1107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,128,1,0,0.1092
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,256,1,0,0.1096
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,512,1,0,0.1102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1024,1,0,0.1101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,2048,1,0,0.1122
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4096,1,0,0.1153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8192,1,0,0.1198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16384,1,0,0.1301
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1,1,0,0.1048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4,1,0,0.1046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8,1,0,0.1045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16,1,0,0.1047
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,32,1,0,0.1145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,64,1,0,0.1117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,128,1,0,0.1105
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,256,1,0,0.1103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,512,1,0,0.1112
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1024,1,0,0.1128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,2048,1,0,0.1152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4096,1,0,0.1175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8192,1,0,0.1233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16384,1,0,0.1322
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1,1,0,0.1043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4,1,0,0.1041
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8,1,0,0.1048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16,1,0,0.1046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,32,1,0,0.1151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,64,1,0,0.1127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,128,1,0,0.1121
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,256,1,0,0.1117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,512,1,0,0.1138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1024,1,0,0.1167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,2048,1,0,0.1177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4096,1,0,0.1264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8192,1,0,0.1345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16384,1,0,0.1504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1,1,0,0.1056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1,1,0,0.0671
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4,1,0,0.1055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4,1,0,0.0666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8,1,0,0.1055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8,1,0,0.0674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16,1,0,0.1065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16,1,0,0.0700
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,32,1,0,0.0789
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,32,1,0,0.1173
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,64,1,0,0.0771
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,64,1,0,0.1152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,128,1,0,0.0756
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,128,1,0,0.1151
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,256,1,0,0.0752
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,256,1,0,0.1159
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,512,1,0,0.0760
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1024,1,0,0.0796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,512,1,0,0.1175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,2048,1,0,0.0806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1024,1,0,0.1197
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4096,1,0,0.0818
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,2048,1,0,0.1272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8192,1,0,0.0870
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16384,1,0,0.0944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4096,1,0,0.1354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8192,1,0,0.1512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16384,1,0,0.1817
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1,1,0,0.1065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4,1,0,0.1071
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8,1,0,0.1067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16,1,0,0.1079
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,32,1,0,0.1199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1,1,0,0.0704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,64,1,0,0.1199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4,1,0,0.0699
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,128,1,0,0.1222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8,1,0,0.0702
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16,1,0,0.0705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,256,1,0,0.1240
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,32,1,0,0.0794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,64,1,0,0.0776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,128,1,0,0.0759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,256,1,0,0.0754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,512,1,0,0.0766
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1024,1,0,0.0760
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,2048,1,0,0.0780
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4096,1,0,0.0817
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8192,1,0,0.0876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16384,1,0,0.1008
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1,1,0,0.0712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4,1,0,0.0699
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8,1,0,0.0708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,512,1,0,0.1238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16,1,0,0.0709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,32,1,0,0.0810
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1024,1,0,0.1301
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,64,1,0,0.0776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,128,1,0,0.0769
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,2048,1,0,0.1384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,256,1,0,0.0770
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,512,1,0,0.0771
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1024,1,0,0.0790
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4096,1,0,0.1556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,2048,1,0,0.0819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4096,1,0,0.0841
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8192,1,0,0.0943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8192,1,0,0.1861
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16384,1,0,0.1067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1,1,0,0.0710
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4,1,0,0.0704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8,1,0,0.0703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16,1,0,0.0714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16384,1,0,0.2476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,32,1,0,0.0810
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,64,1,0,0.0784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1,1,0,0.1112
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,128,1,0,0.0779
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,256,1,0,0.0780
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4,1,0,0.1106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,512,1,0,0.0806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,8,1,0,0.1108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1024,1,0,0.0823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,16,1,0,0.1114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,32,1,0,0.1114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,64,1,0,0.1132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1,1,0,0.1031
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4,1,0,0.1025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,128,1,0,0.1144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8,1,0,0.1029
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16,1,0,0.1028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,256,1,0,0.1308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,2048,1,0,0.0853
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,32,1,0,0.1032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4096,1,0,0.0960
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,512,1,0,0.1400
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,64,1,0,0.1042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8192,1,0,0.1100
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1024,1,0,0.1475
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,2048,1,0,0.1647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16384,1,0,0.1338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1,1,0,0.0716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4,1,0,0.0722
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8,1,0,0.0713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1,1,0,0.0333
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16,1,0,0.0720
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4,1,0,0.0331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,32,1,0,0.0828
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8,1,0,0.0334
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,64,1,0,0.0807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16,1,0,0.0353
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,32,1,0,0.0375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,128,1,0,0.0818
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,64,1,0,0.0373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,128,1,0,0.0366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,256,1,0,0.0827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,128,1,0,0.1059
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,256,1,0,0.0381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,512,1,0,0.0401
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,512,1,0,0.0848
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1,1,0,0.0374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,256,1,0,0.1112
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1024,1,0,0.0873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4,1,0,0.0375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,512,1,0,0.1128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8,1,0,0.0377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,2048,1,0,0.0979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4096,1,0,0.1952
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1024,1,0,0.1142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16,1,0,0.0375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,2048,1,0,0.1152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,32,1,0,0.0377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4096,1,0,0.1106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,64,1,0,0.0391
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4096,1,0,0.1203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1,1,0,0.1178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,128,1,0,0.0408
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8192,1,0,0.1236
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4,1,0,0.1175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8192,1,0,0.1361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1,1,0,0.0688
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,256,1,0,0.0405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16384,1,0,0.1293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4,1,0,0.0693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,512,1,0,0.0425
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8,1,0,0.0689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1024,1,0,0.0440
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1,1,0,0.0492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1024,1,0,0.0431
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,2048,1,0,0.0444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4,1,0,0.0493
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,2048,1,0,0.0434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16384,1,0,0.1834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4096,1,0,0.0465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8,1,0,0.0494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4096,1,0,0.0468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16,1,0,0.0526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8192,1,0,0.0531
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,32,1,0,0.0573
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8192,1,0,0.0510
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16384,1,0,0.0659
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,64,1,0,0.0568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,128,1,0,0.0555
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,256,1,0,0.0556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,512,1,0,0.0577
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1024,1,0,0.0616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,2048,1,0,0.0631
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4096,1,0,0.0646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8192,1,0,0.0690
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,8,1,0,0.1172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1,1,0,0.0512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1,1,0,0.1034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16384,1,0,0.0763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,16,1,0,0.1171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4,1,0,0.0500
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4,1,0,0.1034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16,1,0,0.0689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8,1,0,0.0505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,32,1,0,0.1173
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8,1,0,0.1042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,32,1,0,0.0685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16,1,0,0.0508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16,1,0,0.1037
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,64,1,0,0.1188
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,64,1,0,0.0704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,32,1,0,0.0497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1,1,0,0.0358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,32,1,0,0.1037
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,128,1,0,0.0719
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4,1,0,0.0354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,128,1,0,0.1210
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,64,1,0,0.1047
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1,1,0,0.0739
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8,1,0,0.0354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,128,1,0,0.1066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,256,1,0,0.0749
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16,1,0,0.0361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16384,1,0,0.0602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4,1,0,0.0736
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,256,1,0,0.1531
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,512,1,0,0.0770
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,32,1,0,0.0378
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,256,1,0,0.1117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,8,1,0,0.0734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1024,1,0,0.0777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,64,1,0,0.0376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,16,1,0,0.0745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,512,1,0,0.1130
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,2048,1,0,0.0797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,128,1,0,0.0376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,32,1,0,0.0852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4096,1,0,0.0819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1024,1,0,0.1143
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,64,1,0,0.0863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1,1,0,0.0536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,2048,1,0,0.1192
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,128,1,0,0.0876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4,1,0,0.0529
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8,1,0,0.0528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16,1,0,0.0534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,32,1,0,0.0574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,64,1,0,0.0528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,128,1,0,0.0534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,256,1,0,0.0556
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1,1,0,0.0368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,512,1,0,0.0578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4,1,0,0.0366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,256,1,0,0.0381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8,1,0,0.0368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8192,1,0,0.0873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,512,1,0,0.0414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16,1,0,0.0368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,32,1,0,0.0369
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1024,1,0,0.0411
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16384,1,0,0.0940
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,64,1,0,0.0384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4096,1,0,0.1232
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,128,1,0,0.0400
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,256,1,0,0.0404
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8192,1,0,0.1277
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,512,1,0,0.0422
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,64,1,0,0.0572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1024,1,0,0.0431
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,128,1,0,0.0569
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,2048,1,0,0.0445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,256,1,0,0.0561
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4096,1,0,0.0492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,512,1,0,0.0588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8192,1,0,0.0569
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1024,1,0,0.0588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,512,1,0,0.1666
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1024,1,0,0.0592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16384,1,0,0.0665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1,1,0,0.0373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,2048,1,0,0.0604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4,1,0,0.0373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4096,1,0,0.0636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1024,1,0,0.1865
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8,1,0,0.0371
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16,1,0,0.0369
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8192,1,0,0.0673
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,2048,1,0,0.0430
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,32,1,0,0.0373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16384,1,0,0.0725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,64,1,0,0.0388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4096,1,0,0.0481
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,2048,1,0,0.2179
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,128,1,0,0.0403
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8192,1,0,0.0563
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1,1,0,0.0695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,256,1,0,0.0406
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4,1,0,0.0689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16384,1,0,0.0743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,512,1,0,0.0437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16384,1,0,0.1368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1,1,0,0.0354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8,1,0,0.0696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1024,1,0,0.0448
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1,1,0,0.1031
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4,1,0,0.0350
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16,1,0,0.0692
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,2048,1,0,0.0476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8,1,0,0.0359
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4,1,0,0.1045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,32,1,0,0.0697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4096,1,0,0.2775
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4096,1,0,0.0561
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16,1,0,0.0355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8,1,0,0.1035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,64,1,0,0.0707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,32,1,0,0.0382
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8192,1,0,0.0651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16,1,0,0.1040
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,128,1,0,0.0726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,64,1,0,0.0373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,2048,1,0,0.0610
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,32,1,0,0.1035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,128,1,0,0.0376
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,256,1,0,0.0755
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16384,1,0,0.0793
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,64,1,0,0.1056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,256,1,0,0.0383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4096,1,0,0.0646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1,1,0,0.1346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,512,1,0,0.0776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,512,1,0,0.0384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,128,1,0,0.1068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8192,1,0,0.0694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1024,1,0,0.0782
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,256,1,0,0.1141
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,4,1,0,0.1340
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,2048,1,0,0.0806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,256,1,0,0.0910
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,512,1,0,0.1143
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1,1,0,0.0506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16384,1,0,0.0831
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4096,1,0,0.0860
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,8,1,0,0.1348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1024,1,0,0.1177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4,1,0,0.0510
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,512,1,0,0.0901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1,1,0,0.0535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8192,1,0,0.0925
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,2048,1,0,0.1216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4,1,0,0.0530
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,16,1,0,0.1351
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1024,1,0,0.1046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4096,1,0,0.1265
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16384,1,0,0.0988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8,1,0,0.0535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1,1,0,0.0701
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16,1,0,0.0531
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,32,1,0,0.1362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,2048,1,0,0.1168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4,1,0,0.0695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,32,1,0,0.0585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8,1,0,0.0697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,64,1,0,0.0565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,64,1,0,0.1392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16,1,0,0.0695
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,128,1,0,0.0567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4096,1,0,0.1410
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,128,1,0,0.1421
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,32,1,0,0.0699
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,256,1,0,0.0561
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,64,1,0,0.0713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1,1,0,0.0358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,256,1,0,0.1525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,128,1,0,0.0728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1,1,0,0.0768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4,1,0,0.0357
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1024,1,0,0.0411
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,256,1,0,0.0765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4,1,0,0.0764
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,2048,1,0,0.0447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,512,1,0,0.1706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,512,1,0,0.0785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,8,1,0,0.0763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4096,1,0,0.0539
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1024,1,0,0.0805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,16,1,0,0.0771
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8192,1,0,0.0668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,2048,1,0,0.0855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,32,1,0,0.0774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4096,1,0,0.0904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16384,1,0,0.0790
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,64,1,0,0.0785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1,1,0,0.0347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8192,1,0,0.0972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,128,1,0,0.0805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4,1,0,0.0344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8192,1,0,0.1371
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8,1,0,0.0348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16384,1,0,0.1115
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16,1,0,0.0351
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1,1,0,0.0704
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16384,1,0,0.1536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,32,1,0,0.0368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4,1,0,0.0700
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1,1,0,0.1062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8,1,0,0.0698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4,1,0,0.1060
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16,1,0,0.0705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,512,1,0,0.0569
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8,1,0,0.1051
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,32,1,0,0.0697
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1024,1,0,0.0593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16,1,0,0.1063
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,64,1,0,0.0717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,2048,1,0,0.0612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8,1,0,0.0361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,32,1,0,0.1064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,128,1,0,0.0732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16,1,0,0.0363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,64,1,0,0.1074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,256,1,0,0.0791
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,32,1,0,0.0364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,128,1,0,0.1091
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,64,1,0,0.0378
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1024,1,0,0.2042
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,128,1,0,0.0394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,256,1,0,0.1193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,256,1,0,0.0414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,512,1,0,0.1198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,512,1,0,0.0430
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1024,1,0,0.1236
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1024,1,0,0.0473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,256,1,0,0.0970
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,2048,1,0,0.0561
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,2048,1,0,0.2642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4096,1,0,0.0638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,512,1,0,0.1089
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,64,1,0,0.0370
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8192,1,0,0.0782
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,128,1,0,0.0363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4096,1,0,0.0658
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1024,1,0,0.1253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1,1,0,0.1798
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,256,1,0,0.0360
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8192,1,0,0.0758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,512,1,0,0.0394
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16384,1,0,0.1053
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8,1,0,0.0497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1024,1,0,0.0426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,2048,1,0,0.1512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16384,1,0,0.0880
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16,1,0,0.0502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1,1,0,0.0361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1,1,0,0.0531
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,32,1,0,0.0510
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4,1,0,0.0356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4,1,0,0.0526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,64,1,0,0.0520
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8,1,0,0.0361
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,2048,1,0,0.1289
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8,1,0,0.0529
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16,1,0,0.0358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16,1,0,0.0534
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4096,1,0,0.1998
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,32,1,0,0.0366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4096,1,0,0.1381
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,32,1,0,0.0581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,64,1,0,0.0380
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1,1,0,0.0853
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,64,1,0,0.0570
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8192,1,0,0.1529
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,128,1,0,0.0398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,128,1,0,0.0567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,256,1,0,0.0437
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,512,1,0,0.0807
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,256,1,0,0.0574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,4,1,0,0.1829
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16384,1,0,0.1797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,512,1,0,0.0487
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,512,1,0,0.0599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1024,1,0,0.0860
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,2048,1,0,0.0528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1,1,0,0.1076
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1024,1,0,0.0562
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1024,1,0,0.0618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,8,1,0,0.1813
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4096,1,0,0.0668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,2048,1,0,0.0906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,128,1,0,0.0538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4,1,0,0.1075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,2048,1,0,0.0639
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,2048,1,0,0.0644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8192,1,0,0.0800
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4096,1,0,0.0980
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,4,1,0,0.0850
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,256,1,0,0.0567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,16,1,0,0.1824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8,1,0,0.1078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4096,1,0,0.0770
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,512,1,0,0.0585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,8,1,0,0.0850
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8192,1,0,0.1121
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16384,1,0,0.1033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1024,1,0,0.0601
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16,1,0,0.1075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8192,1,0,0.0902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,32,1,0,0.1830
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,16,1,0,0.0850
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,2048,1,0,0.0617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1,1,0,0.0344
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16384,1,0,0.1383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,32,1,0,0.1079
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16384,1,0,0.1136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4,1,0,0.0347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4096,1,0,0.0656
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,32,1,0,0.0857
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,64,1,0,0.1090
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8,1,0,0.0342
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1,1,0,0.0709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1,1,0,0.0538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8192,1,0,0.0691
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,64,1,0,0.0868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16,1,0,0.0353
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4,1,0,0.0541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4,1,0,0.0707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16384,1,0,0.0759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,32,1,0,0.0375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8,1,0,0.0529
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,128,1,0,0.0889
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8,1,0,0.0711
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1,1,0,0.0507
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,64,1,0,0.0362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16,1,0,0.0542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16,1,0,0.0711
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4,1,0,0.0511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,128,1,0,0.0370
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,256,1,0,0.1243
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,32,1,0,0.0596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8,1,0,0.0511
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,32,1,0,0.0712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,256,1,0,0.0393
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,64,1,0,0.0584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16,1,0,0.0512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,64,1,0,0.0728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,512,1,0,0.0439
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,128,1,0,0.0593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,512,1,0,0.1452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4096,1,0,0.0792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,128,1,0,0.0741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1024,1,0,0.0527
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,256,1,0,0.0608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,256,1,0,0.0850
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,512,1,0,0.0624
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,2048,1,0,0.0647
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8192,1,0,0.1035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,512,1,0,0.0872
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1024,1,0,0.0657
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1024,1,0,0.1714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4096,1,0,0.0773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1024,1,0,0.0912
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,2048,1,0,0.0760
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,2048,1,0,0.0986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4096,1,0,0.0887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8192,1,0,0.1019
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16384,1,0,0.1546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4096,1,0,0.1137
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,2048,1,0,0.2203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,128,1,0,0.1110
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8192,1,0,0.1130
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,256,1,0,0.1237
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1,1,0,0.0367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16384,1,0,0.1500
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8192,1,0,0.1386
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4,1,0,0.0371
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,512,1,0,0.1261
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1,1,0,0.1032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,8,1,0,0.0368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1024,1,0,0.1326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,16,1,0,0.0372
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1,1,0,0.0356
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16384,1,0,0.1612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,4,1,0,0.1044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,32,1,0,0.0373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,2048,1,0,0.1402
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,32,1,0,0.0515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,64,1,0,0.0391
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,8,1,0,0.1045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,64,1,0,0.0526
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16384,1,0,0.1898
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1,1,0,0.0546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,128,1,0,0.0423
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4096,1,0,0.1546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,128,1,0,0.0544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4,1,0,0.0550
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,256,1,0,0.0498
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,16,1,0,0.1049
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,256,1,0,0.0578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,8,1,0,0.0541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1,1,0,0.0737
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,512,1,0,0.0579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,32,1,0,0.1072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8192,1,0,0.1827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,16,1,0,0.0550
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4,1,0,0.0742
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1024,1,0,0.0654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,32,1,0,0.0612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,64,1,0,0.1906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,8,1,0,0.0740
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,64,1,0,0.1113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,64,1,0,0.0614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,16,1,0,0.0746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,2048,1,0,0.0790
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,128,1,0,0.0629
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,32,1,0,0.0746
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,128,1,0,0.2002
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,128,1,0,0.1179
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16384,1,0,0.2398
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,256,1,0,0.0659
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4096,1,0,0.1044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,64,1,0,0.0759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4,1,0,0.0352
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,256,1,0,0.1338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,256,1,0,0.2158
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,128,1,0,0.0777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1,1,0,0.1109
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,8,1,0,0.0353
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,512,1,0,0.0664
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,16,1,0,0.0360
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,256,1,0,0.0917
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1024,1,0,0.0806
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,32,1,0,0.0383
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,512,1,0,0.0600
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,512,1,0,0.1599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,512,1,0,0.0951
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,64,1,0,0.0390
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,512,1,0,0.2510
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1024,1,0,0.0616
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,2048,1,0,0.0926
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,128,1,0,0.0405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,2048,1,0,0.0646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1024,1,0,0.1038
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,256,1,0,0.0457
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4096,1,0,0.0682
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4096,1,0,0.1177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,512,1,0,0.0550
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,2048,1,0,0.1160
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8192,1,0,0.0747
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1024,1,0,0.2101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1024,1,0,0.0674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1024,1,0,0.3170
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1,1,0,0.0573
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16384,1,0,0.0892
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,2048,1,0,0.0802
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4096,1,0,0.1412
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4,1,0,0.0565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1,1,0,0.0516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1,1,0,0.0388
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,1,1,0,0.1698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4,1,0,0.1106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4,1,0,0.0510
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,1,1,0,0.3068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4096,1,0,0.1025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,8,1,0,0.0566
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1,1,0,0.0774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4,1,0,0.0389
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8,1,0,0.0514
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,8,1,0,0.1108
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4,1,0,0.0783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16,1,0,0.0509
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,16,1,0,0.0568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,8,1,0,0.0385
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1,1,0,0.0364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,4,1,0,0.1714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,4,1,0,0.3103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,16,1,0,0.1110
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,32,1,0,0.0512
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,8,1,0,0.0776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,32,1,0,0.0576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,16,1,0,0.0390
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,32,1,0,0.1110
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,64,1,0,0.0530
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4,1,0,0.0358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,8,1,0,0.1724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,16,1,0,0.0782
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,8,1,0,0.3114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,64,1,0,0.0585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,128,1,0,0.0550
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,32,1,0,0.0397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,64,1,0,0.1130
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,8,1,0,0.0366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,256,1,0,0.0592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,32,1,0,0.0784
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,128,1,0,0.0602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,16,1,0,0.1732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,64,1,0,0.0428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,128,1,0,0.1136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,16,1,0,0.0366
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,16,1,0,0.3087
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,512,1,0,0.0611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,64,1,0,0.0795
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,256,1,0,0.0708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,128,1,0,0.0461
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,256,1,0,0.1326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,32,1,0,0.0375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,32,1,0,0.1753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,128,1,0,0.0815
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1024,1,0,0.0639
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,256,1,0,0.0600
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,512,1,0,0.1364
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,2048,1,0,0.0675
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,256,1,0,0.1008
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,512,1,0,0.0674
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,64,1,0,0.1822
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,512,1,0,0.0816
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1024,1,0,0.1448
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,32,1,0,0.3123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4096,1,0,0.0744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,512,1,0,0.1091
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1024,1,0,0.0804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8192,1,0,0.0889
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,2048,1,0,0.1593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,128,1,0,0.1969
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1024,1,0,0.1216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,64,1,0,0.0395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,64,1,0,0.3231
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,2048,1,0,0.1058
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16384,1,0,0.1162
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4096,1,0,0.1882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,128,1,0,0.0434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1,1,0,0.0515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,2048,1,0,0.1472
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,256,1,0,0.2242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,128,1,0,0.3409
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,256,1,0,0.0575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4,1,0,0.0520
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4096,1,0,0.1565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8,1,0,0.0522
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1,1,0,0.0456
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4096,1,0,0.1994
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,256,1,0,0.3758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,512,1,0,0.2774
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16,1,0,0.0522
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,4,1,0,0.0454
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1,1,0,0.1183
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,32,1,0,0.0528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,8,1,0,0.0464
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1,1,0,0.0870
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1024,1,0,0.0973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,64,1,0,0.0544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,1,1,0,0.3049
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,512,1,0,0.0685
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,16,1,0,0.0470
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4,1,0,0.1185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,4,1,0,0.0871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,128,1,0,0.0564
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,2048,1,0,0.1230
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,32,1,0,0.0490
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,8,1,0,0.0871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1024,1,0,0.0849
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,8,1,0,0.1179
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,256,1,0,0.0625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,4,1,0,0.3095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,16,1,0,0.0874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,64,1,0,0.0522
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,512,1,0,0.0651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,2048,1,0,0.1072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4096,1,0,0.1713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,16,1,0,0.1183
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,512,1,0,0.4441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,32,1,0,0.0870
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,128,1,0,0.0570
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1024,1,0,0.0681
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1,1,0,0.0614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,32,1,0,0.1176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4096,1,0,0.1586
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,64,1,0,0.0897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,256,1,0,0.0692
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,2048,1,0,0.0758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,4,1,0,0.0611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,64,1,0,0.1198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,8,1,0,0.3088
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,128,1,0,0.0914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,1,1,0,0.5602
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,512,1,0,0.0822
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4096,1,0,0.0898
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,8,1,0,0.0612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,128,1,0,0.1216
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,256,1,0,0.1052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8192,1,0,0.1152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,16,1,0,0.0609
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,256,1,0,0.1306
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,16,1,0,0.3084
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,512,1,0,0.1198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,32,1,0,0.0622
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1,1,0,0.0431
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16384,1,0,0.1651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,4,1,0,0.5638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,512,1,0,0.1378
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,64,1,0,0.0634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,4,1,0,0.0426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1024,1,0,0.1075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1,1,0,0.0543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1024,1,0,0.1445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,32,1,0,0.3122
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,128,1,0,0.0661
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,8,1,0,0.0445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1024,1,0,0.1528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4,1,0,0.0546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,8,1,0,0.5652
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,2048,1,0,0.1581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,2048,1,0,0.1960
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,16,1,0,0.0438
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,256,1,0,0.0894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,64,1,0,0.3254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,8,1,0,0.0543
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,2048,1,0,0.1814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1,1,0,0.0620
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,32,1,0,0.0481
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,512,1,0,0.1085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1,1,0,0.1066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,16,1,0,0.0548
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,4,1,0,0.0618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,16,1,0,0.5659
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,64,1,0,0.0489
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1024,1,0,0.1340
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,4,1,0,0.1068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4096,1,0,0.2397
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,32,1,0,0.0548
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,8,1,0,0.0620
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,128,1,0,0.3507
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,128,1,0,0.0538
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,64,1,0,0.0571
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,8,1,0,0.1068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,16,1,0,0.0620
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,2048,1,0,0.1836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1,1,0,0.1363
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,128,1,0,0.0583
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,256,1,0,0.0731
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,16,1,0,0.1078
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,256,1,0,0.0670
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,32,1,0,0.5693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,4,1,0,0.1355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,512,1,0,0.0701
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,512,1,0,0.0922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,8,1,0,0.1362
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1024,1,0,0.0777
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,256,1,0,0.4029
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,16,1,0,0.1360
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1,1,0,0.0745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1024,1,0,0.1167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,32,1,0,0.1373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,2048,1,0,0.0908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,4,1,0,0.0749
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,64,1,0,0.5909
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,64,1,0,0.1401
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,8,1,0,0.0745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4096,1,0,0.1162
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,128,1,0,0.1446
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,16,1,0,0.0754
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,2048,1,0,0.1652
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,256,1,0,0.1635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,32,1,0,0.0772
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1,1,0,0.0567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4,1,0,0.0568
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1,1,0,0.0570
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,128,1,0,0.6231
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,512,1,0,0.1799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,8,1,0,0.0570
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,32,1,0,0.0641
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,4,1,0,0.0578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,16,1,0,0.0571
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,32,1,0,0.0578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,64,1,0,0.0690
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,32,1,0,0.1088
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1024,1,0,0.2100
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,64,1,0,0.0596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,128,1,0,0.0761
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,64,1,0,0.1142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,128,1,0,0.0615
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,256,1,0,0.0735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,128,1,0,0.1214
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,256,1,0,0.0980
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,512,1,0,0.0808
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,256,1,0,0.6938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,2048,1,0,0.2665
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,256,1,0,0.1483
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1024,1,0,0.0942
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,512,1,0,0.1242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1,1,0,0.1859
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,2048,1,0,0.1195
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,512,1,0,0.1753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,64,1,0,0.0799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,4,1,0,0.1866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,8,1,0,0.1871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,128,1,0,0.0869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1024,1,0,0.1735
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,8,1,0,0.0584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,16,1,0,0.1878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4096,1,0,0.1713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,256,1,0,0.1025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,16,1,0,0.0590
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1024,1,0,0.2258
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,1,1,0,0.0910
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,32,1,0,0.0618
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1,1,0,0.0634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,512,1,0,0.1279
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,4,1,0,0.0632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,1,1,0,0.1785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,8,1,0,0.0634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,16,1,0,0.0634
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,4,1,0,0.1794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,32,1,0,0.0642
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,64,1,0,0.0668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1024,1,0,0.1773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,8,1,0,0.1796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,128,1,0,0.0696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,16,1,0,0.1812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,256,1,0,0.0814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,1,1,0,0.1071
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,32,1,0,0.1829
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,512,1,0,0.0951
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,4,1,0,0.1083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,64,1,0,0.0646
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,64,1,0,0.1899
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,8,1,0,0.1097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1024,1,0,0.1204
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,128,1,0,0.0720
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,16,1,0,0.1124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,128,1,0,0.2034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,256,1,0,0.0869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,32,1,0,0.1152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,2048,1,0,0.1708
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,512,1,0,0.1117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,256,1,0,0.2520
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,64,1,0,0.1218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1,1,0,0.0779
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,4,1,0,0.0780
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,32,1,0,0.1893
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,128,1,0,0.1354
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,8,1,0,0.0779
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1024,1,0,0.1612
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,64,1,0,0.1937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,4,1,0,0.0911
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,512,1,0,0.3059
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,256,1,0,0.1625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,128,1,0,0.2023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,8,1,0,0.0915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,1,1,0,0.0827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,16,1,0,0.0919
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,1,1,0,0.3223
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,256,1,0,0.2346
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,32,1,0,0.0945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,512,1,0,0.2143
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,4,1,0,0.3258
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,512,1,0,0.2660
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,64,1,0,0.1043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,1,1,0,0.1888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,128,1,0,0.1186
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,8,1,0,0.3233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,16,1,0,0.0785
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1024,1,0,0.3236
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,4,1,0,0.1908
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,32,1,0,0.0787
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,16,1,0,0.3263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,256,1,0,0.1550
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,64,1,0,0.0835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,4,1,0,0.0838
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,1,1,0,0.3187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,8,1,0,0.1929
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,128,1,0,0.0910
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,32,1,0,0.3271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,8,1,0,0.0842
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,4,1,0,0.3209
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,256,1,0,0.1146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,512,1,0,0.2086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,16,1,0,0.1957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,16,1,0,0.0863
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,8,1,0,0.3260
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,64,1,0,0.3387
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,32,1,0,0.0887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,512,1,0,0.1413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,1,1,0,0.1424
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,32,1,0,0.2005
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,16,1,0,0.3267
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,64,1,0,0.0968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,4,1,0,0.1434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,32,1,0,0.3286
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1024,1,0,0.1911
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,128,1,0,0.1102
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,64,1,0,0.2106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,64,1,0,0.3348
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,8,1,0,0.1455
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,1,1,0,0.1140
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,256,1,0,0.1365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,4,1,0,0.1145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,128,1,0,0.2326
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,128,1,0,0.3498
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,16,1,0,0.1468
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,8,1,0,0.1154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,512,1,0,0.1881
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,32,1,0,0.1520
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,16,1,0,0.1172
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,256,1,0,0.4099
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,32,1,0,0.1205
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,256,1,0,0.2866
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,128,1,0,0.3637
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,1,1,0,0.1282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,64,1,0,0.1680
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,64,1,0,0.1291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,4,1,0,0.1295
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,512,1,0,0.4720
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,128,1,0,0.1429
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,128,1,0,0.1898
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,8,1,0,0.1330
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,256,1,0,0.4518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,256,1,0,0.1847
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,1,1,0,0.5845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,16,1,0,0.1368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,256,1,0,0.2537
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,4,1,0,0.5900
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,512,1,0,0.2367
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,32,1,0,0.1447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,8,1,0,0.5985
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,1,1,0,0.2023
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,64,1,0,0.1548
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,4,1,0,0.2032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,16,1,0,0.6034
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,128,1,0,0.1759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,8,1,0,0.2050
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,32,1,0,0.6047
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,16,1,0,0.2066
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,256,1,0,0.2292
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,64,1,0,0.6160
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,32,1,0,0.2101
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,64,1,0,0.2229
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,128,1,0,0.6416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,128,1,0,0.2460
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,256,1,0,0.7579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,256,1,0,0.3186
