framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1,1,0,0.5929
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1,1,0,0.6258
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4,1,0,0.5807
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8,1,0,0.5854
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16,1,0,0.5717
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4,1,0,0.6261
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,32,1,0,0.5771
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8,1,0,0.6112
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,64,1,0,0.5699
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16,1,0,0.5917
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,128,1,0,0.5627
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,32,1,0,0.6050
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,256,1,0,0.5765
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,64,1,0,0.5914
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,512,1,0,0.6209
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,128,1,0,0.5981
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1024,1,0,0.7636
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,256,1,0,0.6189
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,512,1,0,0.6573
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,2048,1,0,1.2043
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1024,1,0,0.7894
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,2048,1,0,1.2193
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4096,1,0,2.4086
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4096,1,0,2.4102
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8192,1,0,6.7538
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8192,1,0,6.4380
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16384,1,0,21.1657
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1,1,0,0.5735
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16384,1,0,21.0162
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4,1,0,0.5778
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1,1,0,0.6046
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8,1,0,0.5632
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4,1,0,0.6166
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16,1,0,0.5702
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8,1,0,0.6087
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,32,1,0,0.5681
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16,1,0,0.6112
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,64,1,0,0.5672
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,32,1,0,0.5995
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,128,1,0,0.5815
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,64,1,0,0.6133
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,256,1,0,0.6261
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,128,1,0,0.6206
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,512,1,0,0.7388
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,256,1,0,0.6726
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1024,1,0,1.0875
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,512,1,0,0.7719
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1024,1,0,1.1219
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,2048,1,0,1.9734
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,2048,1,0,1.9897
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4096,1,0,4.6250
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4096,1,0,4.6860
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8192,1,0,13.6416
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8192,1,0,13.5498
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16384,1,0,42.9967
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16384,1,0,42.1741
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1,1,0,0.5914
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1,1,0,0.6064
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4,1,0,0.5852
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4,1,0,0.6244
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8,1,0,0.5731
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8,1,0,0.6074
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16,1,0,0.5770
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16,1,0,0.6112
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,32,1,0,0.5843
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,32,1,0,0.6085
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,64,1,0,0.5904
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,64,1,0,0.6189
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,128,1,0,0.6356
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,128,1,0,0.6713
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,256,1,0,0.7265
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,256,1,0,0.7653
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,512,1,0,1.0451
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,512,1,0,1.0690
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1024,1,0,1.7618
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1024,1,0,1.7747
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,2048,1,0,3.8133
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,2048,1,0,3.8134
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4096,1,0,9.6274
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4096,1,0,9.7168
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8192,1,0,27.1615
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8192,1,0,26.8860
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16384,1,0,85.6383
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1,1,0,0.6228
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16384,1,0,87.0472
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4,1,0,0.6216
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1,1,0,0.6028
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8,1,0,0.6180
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16,1,0,0.6150
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4,1,0,0.5810
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,32,1,0,0.6317
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,64,1,0,0.6872
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,128,1,0,0.7750
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,256,1,0,1.0431
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,512,1,0,1.6775
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8,1,0,0.5713
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16,1,0,0.5728
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,32,1,0,0.5835
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,64,1,0,0.6355
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,128,1,0,0.7141
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,256,1,0,1.0221
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,512,1,0,1.6605
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1024,1,0,3.3472
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1024,1,0,3.3733
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,2048,1,0,7.4291
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,2048,1,0,7.7828
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4096,1,0,19.1061
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4096,1,0,19.2501
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8192,1,0,57.1754
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8192,1,0,57.0492
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16384,1,0,174.8555
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1,1,0,0.6374
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4,1,0,0.6256
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8,1,0,0.6407
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16,1,0,0.6444
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,32,1,0,0.6825
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,64,1,0,0.7976
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16384,1,0,177.9952
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,128,1,0,1.0503
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1,1,0,0.6122
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1,1,0,0.5986
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,256,1,0,1.6474
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4,1,0,0.6023
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8,1,0,0.6037
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16,1,0,0.6206
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1,1,0,0.6356
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,32,1,0,0.6563
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,512,1,0,3.1758
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,64,1,0,0.7745
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,128,1,0,1.0474
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,256,1,0,1.6360
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1024,1,0,6.5344
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,512,1,0,3.2604
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1024,1,0,6.7209
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,2048,1,0,15.4008
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4,1,0,0.5970
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8,1,0,0.5832
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16,1,0,0.5868
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4,1,0,0.6237
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,32,1,0,0.5866
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8,1,0,0.6140
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,64,1,0,0.5872
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16,1,0,0.6174
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,128,1,0,0.5738
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,2048,1,0,15.3901
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,32,1,0,0.6331
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,256,1,0,0.5876
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,64,1,0,0.6177
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,512,1,0,0.6080
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1024,1,0,0.6362
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,128,1,0,1.0331
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,256,1,0,0.6158
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,512,1,0,0.6488
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1024,1,0,0.6922
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,2048,1,0,0.8762
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4096,1,0,1.3721
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,2048,1,0,0.8418
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8192,1,0,3.2778
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4096,1,0,1.3544
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8192,1,0,3.2729
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16384,1,0,10.4665
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1,1,0,0.6748
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4,1,0,0.6257
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8,1,0,0.6215
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16,1,0,0.6321
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,32,1,0,0.6252
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16384,1,0,10.1052
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,64,1,0,0.6226
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1,1,0,0.5740
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,128,1,0,0.6298
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4,1,0,0.5808
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,256,1,0,0.6404
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8,1,0,0.5766
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,512,1,0,0.6896
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16,1,0,0.5800
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1024,1,0,0.8313
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4096,1,0,40.3750
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,2048,1,0,1.1641
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1,1,0,0.7625
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4,1,0,0.7298
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4096,1,0,2.4493
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,8,1,0,0.7576
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,16,1,0,0.7939
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,32,1,0,0.9119
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,32,1,0,0.5824
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,64,1,0,1.1540
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,64,1,0,0.5763
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,128,1,0,0.6481
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,128,1,0,1.6584
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,256,1,0,0.6133
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8192,1,0,6.5749
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,512,1,0,0.6474
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1024,1,0,0.7832
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,256,1,0,3.0540
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,2048,1,0,1.1415
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4096,1,0,2.4386
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4096,1,0,39.9276
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1,1,0,0.7220
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,512,1,0,6.1577
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4,1,0,0.6567
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,8,1,0,0.6643
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,16,1,0,0.7353
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,32,1,0,0.8411
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8192,1,0,6.5111
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,64,1,0,1.1120
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,128,1,0,1.6470
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,256,1,0,3.0567
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16384,1,0,20.7144
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1024,1,0,13.4419
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,512,1,0,6.2311
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1,1,0,0.6161
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4,1,0,0.6237
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8,1,0,0.6163
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16,1,0,0.6106
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16384,1,0,20.5230
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,32,1,0,0.6278
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1,1,0,0.5854
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,64,1,0,0.6251
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4,1,0,0.5869
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,128,1,0,0.6345
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8,1,0,0.5829
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,256,1,0,0.6940
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16,1,0,0.5830
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,512,1,0,0.7894
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,32,1,0,0.5891
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1024,1,0,1.0622
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,64,1,0,0.5921
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1024,1,0,13.5266
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,128,1,0,0.6152
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,256,1,0,0.6572
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,2048,1,0,2.0275
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,512,1,0,0.7524
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1024,1,0,1.0413
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,2048,1,0,2.0224
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4096,1,0,4.5898
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4096,1,0,4.6802
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,2048,1,0,30.8635
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8192,1,0,13.4912
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8192,1,0,13.4123
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,2048,1,0,30.6691
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16384,1,0,42.1989
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1,1,0,0.6303
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4,1,0,0.6399
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8,1,0,0.6334
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16,1,0,0.6188
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16384,1,0,41.9512
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,32,1,0,0.6270
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1,1,0,0.6035
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,64,1,0,0.6417
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4,1,0,0.6053
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,128,1,0,0.6956
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8,1,0,0.5970
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,256,1,0,0.7871
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16,1,0,0.5925
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,512,1,0,1.0215
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,32,1,0,0.6013
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,64,1,0,0.6168
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,128,1,0,0.6615
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,256,1,0,0.7521
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,512,1,0,1.0039
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1024,1,0,1.8134
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1024,1,0,1.8269
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,2048,1,0,3.8446
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,2048,1,0,3.7994
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4096,1,0,9.6437
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4096,1,0,79.0372
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4096,1,0,9.5445
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1,1,0,0.9251
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4,1,0,0.9417
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,8,1,0,0.9924
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,16,1,0,1.1142
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,32,1,0,1.2932
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,64,1,0,1.7933
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,128,1,0,3.0683
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,256,1,0,5.9435
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4096,1,0,80.4987
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1,1,0,0.8466
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4,1,0,0.8549
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,8,1,0,0.9054
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,16,1,0,1.0132
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,32,1,0,1.2558
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8192,1,0,26.9775
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,64,1,0,1.7788
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8192,1,0,26.6137
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,512,1,0,12.4716
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,128,1,0,3.1075
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,256,1,0,6.0123
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,512,1,0,12.5716
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1024,1,0,26.9314
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1024,1,0,26.9943
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16384,1,0,85.8027
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1,1,0,0.6542
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4,1,0,0.6417
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8,1,0,0.6386
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16384,1,0,86.8709
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16,1,0,0.6480
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,32,1,0,0.6667
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1,1,0,0.6152
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,64,1,0,0.7156
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4,1,0,0.6087
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,128,1,0,0.8059
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8,1,0,0.6156
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,256,1,0,1.0052
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16,1,0,0.6039
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,32,1,0,0.6544
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,2048,1,0,62.5106
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,64,1,0,0.6750
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,512,1,0,1.7346
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,128,1,0,0.7617
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,256,1,0,0.9820
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1,1,0,1.2278
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,4,1,0,1.3026
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,512,1,0,1.7230
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,8,1,0,1.4154
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,16,1,0,1.6552
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1024,1,0,3.3553
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,32,1,0,2.1184
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1024,1,0,3.3825
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,64,1,0,3.3762
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,2048,1,0,7.6566
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,2048,1,0,7.7373
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,128,1,0,6.0409
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,2048,1,0,63.2136
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1,1,0,1.1729
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,4,1,0,1.2331
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,8,1,0,1.3481
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,16,1,0,1.6429
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,32,1,0,2.0567
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,256,1,0,12.0100
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,64,1,0,3.3870
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4096,1,0,19.4854
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1,1,0,0.6532
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4096,1,0,19.5086
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4,1,0,0.6421
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,8,1,0,0.6523
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1,1,0,0.6685
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,16,1,0,0.6743
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,128,1,0,6.0522
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,32,1,0,0.7073
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,64,1,0,0.8339
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,128,1,0,1.0245
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,256,1,0,1.7088
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4,1,0,0.6870
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,512,1,0,3.1779
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,8,1,0,0.6716
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,16,1,0,0.6874
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,32,1,0,0.7485
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,64,1,0,0.8409
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,256,1,0,12.1227
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,128,1,0,1.0188
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,256,1,0,1.7035
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1024,1,0,6.7048
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,512,1,0,25.2400
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,512,1,0,3.2371
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1024,1,0,6.8124
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,2048,1,0,15.5826
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,512,1,0,25.2798
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,2048,1,0,15.9152
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4096,1,0,39.0768
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1024,1,0,55.3808
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1,1,0,0.7867
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4,1,0,0.7872
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1,1,0,2.0005
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,8,1,0,0.7955
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,16,1,0,0.8135
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,32,1,0,0.9214
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,4,1,0,2.1170
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,64,1,0,1.1041
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,8,1,0,2.3163
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4096,1,0,40.4842
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,128,1,0,1.7444
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1,1,0,0.7757
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,16,1,0,2.7893
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4,1,0,0.7987
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,256,1,0,3.1431
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,8,1,0,0.8112
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,16,1,0,0.8651
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,32,1,0,0.9665
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,32,1,0,4.0328
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,64,1,0,1.1337
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,128,1,0,1.7536
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,512,1,0,6.2995
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,256,1,0,3.1299
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1024,1,0,55.9032
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,64,1,0,6.6206
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1,1,0,1.8712
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,4,1,0,1.9554
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,512,1,0,6.3206
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,8,1,0,2.1739
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1024,1,0,13.5017
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,16,1,0,2.6772
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,128,1,0,12.0275
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,32,1,0,3.9390
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1024,1,0,13.7721
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,64,1,0,6.5215
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,128,1,0,12.0944
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,256,1,0,24.0587
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,2048,1,0,30.8163
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1,1,0,0.9493
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,4,1,0,0.9737
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,8,1,0,1.0080
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,16,1,0,1.1352
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,32,1,0,1.3160
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,2048,1,0,31.2202
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,64,1,0,1.9171
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1,1,0,0.9912
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,4,1,0,1.0524
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,128,1,0,3.2150
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,8,1,0,1.1002
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,16,1,0,1.1914
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,32,1,0,1.3529
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,256,1,0,24.4256
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,64,1,0,1.9421
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,256,1,0,6.1496
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,128,1,0,3.2067
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,256,1,0,6.2966
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,512,1,0,12.7314
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,512,1,0,12.9581
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,512,1,0,52.8415
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,1,1,0,3.2677
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,4,1,0,3.6440
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,8,1,0,4.0951
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1024,1,0,27.2872
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1,1,0,1.3698
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,16,1,0,5.3318
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,4,1,0,1.4380
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,8,1,0,1.5365
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,16,1,0,1.7017
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,512,1,0,53.1381
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1024,1,0,27.7054
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,32,1,0,2.3119
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1,1,0,1.4595
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,32,1,0,7.8459
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,1,1,0,3.3589
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,4,1,0,1.5290
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,64,1,0,3.5428
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,8,1,0,1.6479
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,4,1,0,3.3995
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,128,1,0,6.2843
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,8,1,0,3.8569
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,16,1,0,1.8031
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,64,1,0,13.1816
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,32,1,0,2.3897
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,16,1,0,5.0911
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,64,1,0,3.6075
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,256,1,0,12.3975
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,32,1,0,7.6001
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,128,1,0,6.3268
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,128,1,0,23.9733
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,64,1,0,13.0429
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,256,1,0,12.4762
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,512,1,0,25.6569
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,1,1,0,2.2297
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,4,1,0,2.3846
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,8,1,0,2.5377
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,128,1,0,23.9367
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,16,1,0,3.1386
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,512,1,0,25.7341
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,32,1,0,4.3358
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,1,1,0,2.3219
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,4,1,0,2.5083
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,8,1,0,2.6468
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,64,1,0,6.9161
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,16,1,0,3.2210
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,32,1,0,4.4385
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,256,1,0,49.8068
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,128,1,0,12.5101
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,64,1,0,7.0080
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,1,1,0,5.9604
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,4,1,0,6.7892
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,128,1,0,12.6056
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,8,1,0,8.0570
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,256,1,0,50.1695
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,256,1,0,24.8924
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,1,1,0,5.8633
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,1,1,0,3.9340
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,16,1,0,10.5079
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,4,1,0,4.1359
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,4,1,0,6.2734
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,256,1,0,24.8801
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,8,1,0,4.7470
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,1,1,0,4.1613
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,8,1,0,7.5780
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,32,1,0,15.7200
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,16,1,0,5.9100
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,4,1,0,4.4300
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,8,1,0,5.0087
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,16,1,0,10.1326
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,32,1,0,8.4390
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,16,1,0,6.1803
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,32,1,0,8.6693
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,64,1,0,13.8047
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,64,1,0,26.5586
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,32,1,0,15.1766
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,64,1,0,14.0087
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,128,1,0,25.0442
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,64,1,0,25.9665
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,128,1,0,25.0884
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,128,1,0,49.8859
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,128,1,0,49.3736
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1,1,0,0.6198
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1,1,0,0.6535
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4,1,0,0.6428
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8,1,0,0.6248
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4,1,0,0.9721
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16,1,0,1.0868
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8,1,0,0.5986
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,32,1,0,0.6339
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16,1,0,0.9762
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,32,1,0,0.6024
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,64,1,0,0.9337
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,128,1,0,0.6001
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,256,1,0,0.6012
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,512,1,0,0.6012
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,64,1,0,0.6342
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1024,1,0,0.6079
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,128,1,0,0.6234
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,2048,1,0,0.6801
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,256,1,0,0.6440
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4096,1,0,0.9429
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,512,1,0,0.6390
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1024,1,0,0.6505
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,2048,1,0,0.7067
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8192,1,0,1.8102
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4096,1,0,0.9688
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8192,1,0,1.8383
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16384,1,0,4.9985
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1,1,0,0.5935
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4,1,0,0.5996
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8,1,0,0.5985
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16,1,0,0.5895
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16384,1,0,5.1030
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,32,1,0,0.5932
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1,1,0,0.7794
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,64,1,0,0.6040
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4,1,0,0.6242
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,128,1,0,0.9663
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8,1,0,0.6144
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,256,1,0,0.6086
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16,1,0,0.6392
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,512,1,0,0.6199
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,32,1,0,0.6107
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,64,1,0,0.6176
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,128,1,0,0.6296
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,256,1,0,0.6361
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,512,1,0,0.6510
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1024,1,0,0.6977
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,2048,1,0,0.8554
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4096,1,0,1.4017
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1024,1,0,0.6511
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,2048,1,0,1.0946
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8192,1,0,3.3971
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4096,1,0,1.6329
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8192,1,0,3.3995
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16384,1,0,10.8914
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1,1,0,0.6240
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4,1,0,0.6260
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8,1,0,0.6245
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16,1,0,0.6601
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,32,1,0,0.6335
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16384,1,0,10.5730
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,64,1,0,0.6306
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1,1,0,0.5902
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,128,1,0,0.6314
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4,1,0,0.5977
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,256,1,0,0.6548
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8,1,0,0.6020
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,512,1,0,0.6848
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16,1,0,0.5934
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1024,1,0,0.8026
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,32,1,0,0.5902
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,64,1,0,0.5989
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,2048,1,0,1.1936
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,128,1,0,0.6000
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,256,1,0,0.6113
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,512,1,0,0.6602
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4096,1,0,2.5189
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1024,1,0,0.7658
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,2048,1,0,1.1881
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4096,1,0,2.5087
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8192,1,0,7.0552
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8192,1,0,6.8382
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16384,1,0,21.8135
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16384,1,0,21.1071
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1,1,0,0.6240
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1,1,0,0.6649
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4,1,0,0.6242
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4,1,0,0.5995
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8,1,0,0.6266
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16,1,0,0.6209
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8,1,0,0.8441
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,32,1,0,0.6298
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,64,1,0,0.6266
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16,1,0,0.7606
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,128,1,0,0.6467
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,32,1,0,0.6111
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,256,1,0,0.6815
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,64,1,0,0.6238
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,128,1,0,0.8792
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,512,1,0,0.7672
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,256,1,0,0.8300
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1024,1,0,1.0777
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,512,1,0,0.8908
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1024,1,0,1.2608
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,2048,1,0,2.0879
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,2048,1,0,2.1806
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4096,1,0,4.9000
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4096,1,0,4.8000
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8192,1,0,13.7686
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8192,1,0,13.9705
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16384,1,0,43.4535
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16384,1,0,43.1418
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1,1,0,0.6699
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1,1,0,0.6087
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4,1,0,0.6381
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4,1,0,0.6113
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8,1,0,0.6352
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8,1,0,0.6204
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16,1,0,0.6385
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16,1,0,0.6165
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,32,1,0,0.6403
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,32,1,0,0.6342
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,64,1,0,0.6651
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,64,1,0,0.6400
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,128,1,0,0.7090
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,128,1,0,0.6838
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,256,1,0,0.7712
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,256,1,0,0.7422
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,512,1,0,1.0386
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,512,1,0,1.0252
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1024,1,0,1.8936
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1024,1,0,1.8710
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,2048,1,0,4.0779
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,2048,1,0,4.0134
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4096,1,0,10.3190
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1,1,0,0.6821
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4096,1,0,9.8811
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4,1,0,0.6802
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1,1,0,1.0546
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,8,1,0,0.6780
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,16,1,0,0.6816
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4,1,0,1.0534
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,32,1,0,0.7135
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,8,1,0,1.0655
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,64,1,0,0.7487
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,128,1,0,0.8204
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,16,1,0,1.0905
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,256,1,0,1.0588
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,32,1,0,1.0810
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,64,1,0,1.0645
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,512,1,0,1.8124
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,128,1,0,0.7751
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,256,1,0,1.0456
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1024,1,0,3.6116
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,512,1,0,1.8004
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1024,1,0,3.5497
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,2048,1,0,8.1163
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,2048,1,0,8.0531
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4096,1,0,20.2834
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1,1,0,0.7139
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4096,1,0,19.9016
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4,1,0,0.7315
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,8,1,0,0.7205
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1,1,0,0.6969
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,16,1,0,0.7580
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4,1,0,0.6965
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,32,1,0,0.7925
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,8,1,0,0.7016
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,64,1,0,0.8743
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,16,1,0,0.7127
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,128,1,0,1.0976
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,32,1,0,0.7632
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,64,1,0,0.8313
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,256,1,0,1.7987
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,128,1,0,1.4159
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,512,1,0,3.4170
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,256,1,0,1.7945
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1024,1,0,7.1805
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,512,1,0,3.3740
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1024,1,0,6.9412
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,2048,1,0,16.2710
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1,1,0,0.8540
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,4,1,0,0.8680
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,8,1,0,0.9021
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,16,1,0,0.9604
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,32,1,0,1.0389
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,64,1,0,1.2338
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,128,1,0,1.8853
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,2048,1,0,16.0864
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1,1,0,0.8235
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,256,1,0,3.3965
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,4,1,0,0.8398
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,8,1,0,0.8673
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,16,1,0,0.9164
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,32,1,0,0.9966
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,512,1,0,6.7629
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,64,1,0,1.2390
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,128,1,0,1.9000
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1024,1,0,14.5492
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,256,1,0,3.4039
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1,1,0,1.1628
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,4,1,0,1.2312
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,8,1,0,1.2811
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,16,1,0,1.3666
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,512,1,0,6.7671
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,32,1,0,1.5617
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,64,1,0,2.1828
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,128,1,0,3.5853
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,256,1,0,6.7514
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1024,1,0,14.4525
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1,1,0,1.9085
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,4,1,0,1.2055
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,512,1,0,13.7028
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,8,1,0,1.2281
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,16,1,0,1.3059
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,1,1,0,1.8095
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,32,1,0,1.5229
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,4,1,0,1.9151
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,64,1,0,2.1768
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,8,1,0,1.9863
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,16,1,0,2.1836
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,128,1,0,3.5706
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,32,1,0,2.7927
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,64,1,0,4.1370
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,256,1,0,7.1579
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,128,1,0,6.9653
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,512,1,0,13.5013
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,1,1,0,1.7790
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,4,1,0,1.8550
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,256,1,0,13.4802
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,8,1,0,1.9351
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,1,1,0,3.0959
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,16,1,0,2.1295
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,32,1,0,2.7460
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,4,1,0,3.2591
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,64,1,0,4.0921
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,8,1,0,3.4626
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,16,1,0,4.0521
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,128,1,0,6.9820
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,32,1,0,5.4114
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,64,1,0,8.1482
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,256,1,0,13.3036
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,1,1,0,3.0842
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,4,1,0,3.1546
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,8,1,0,3.4130
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,16,1,0,3.9202
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,128,1,0,13.9787
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,32,1,0,5.2664
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,64,1,0,8.0014
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,128,1,0,13.9506
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1,1,0,0.6067
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4,1,0,0.6155
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8,1,0,0.5945
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16,1,0,0.6012
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,32,1,0,0.6080
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,64,1,0,0.6009
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,128,1,0,0.6064
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,256,1,0,0.6024
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,512,1,0,0.6031
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1,1,0,0.6516
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1024,1,0,0.5952
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,2048,1,0,0.6220
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4096,1,0,0.7242
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8192,1,0,1.1733
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16384,1,0,2.6966
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1,1,0,0.7617
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4,1,0,0.5960
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8,1,0,0.6005
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4,1,0,0.6423
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8,1,0,0.6371
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16,1,0,0.6027
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16,1,0,0.6358
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,32,1,0,0.6014
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,32,1,0,0.6467
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,64,1,0,0.6114
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,64,1,0,0.6412
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,128,1,0,0.6083
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,128,1,0,0.6674
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,256,1,0,0.9758
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,256,1,0,0.9858
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,512,1,0,0.9978
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,512,1,0,0.9699
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1024,1,0,0.6375
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1024,1,0,0.7607
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,2048,1,0,0.6399
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,2048,1,0,0.6845
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4096,1,0,0.9640
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4096,1,0,0.7451
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8192,1,0,1.1843
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8192,1,0,1.8977
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16384,1,0,2.6081
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1,1,0,0.6131
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16384,1,0,5.3345
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4,1,0,0.6211
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1,1,0,0.6111
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8,1,0,0.6211
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16,1,0,0.6221
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4,1,0,0.6069
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,32,1,0,0.6219
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8,1,0,0.6208
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,64,1,0,0.6260
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16,1,0,0.6168
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,32,1,0,0.6110
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,128,1,0,0.6704
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,64,1,0,0.6009
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,128,1,0,0.6124
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,256,1,0,0.6179
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,512,1,0,0.6195
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,256,1,0,0.6150
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,512,1,0,0.6382
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1024,1,0,0.6406
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1024,1,0,0.6548
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,2048,1,0,0.6842
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,2048,1,0,0.8491
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4096,1,0,0.9894
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4096,1,0,1.4555
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8192,1,0,1.8487
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8192,1,0,3.6032
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16384,1,0,5.2746
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1,1,0,0.6288
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4,1,0,0.6181
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8,1,0,0.6162
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16,1,0,0.6218
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,32,1,0,0.6222
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,64,1,0,0.6138
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16384,1,0,11.1290
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,128,1,0,0.6238
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,256,1,0,0.6248
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1,1,0,0.6138
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,512,1,0,0.6456
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4,1,0,0.6163
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1024,1,0,0.6715
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,2048,1,0,0.8654
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8,1,0,0.5944
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16,1,0,0.5960
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4096,1,0,1.4341
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,32,1,0,0.6083
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,64,1,0,0.6100
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8192,1,0,3.4976
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,128,1,0,0.6058
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,256,1,0,0.6313
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,512,1,0,0.6644
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1024,1,0,0.7924
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,2048,1,0,1.2217
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16384,1,0,10.7424
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1,1,0,0.6287
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4,1,0,0.6254
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4096,1,0,2.7055
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8,1,0,0.6374
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16,1,0,0.9952
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,32,1,0,1.0110
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,64,1,0,0.9939
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,128,1,0,0.8564
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,256,1,0,0.6465
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,512,1,0,0.6839
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8192,1,0,7.1383
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1024,1,0,0.8221
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,2048,1,0,1.2241
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4096,1,0,2.6845
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8192,1,0,6.9183
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16384,1,0,22.5300
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1,1,0,0.6154
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4,1,0,0.6095
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8,1,0,0.6220
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16,1,0,0.6210
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,32,1,0,0.6138
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,64,1,0,0.6167
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,128,1,0,0.8921
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,256,1,0,0.6801
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,512,1,0,0.7699
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1024,1,0,1.1301
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,2048,1,0,2.2812
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16384,1,0,21.5482
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4096,1,0,5.3287
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1,1,0,0.6388
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1,1,0,0.6400
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4,1,0,0.6473
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,8,1,0,0.6358
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4,1,0,0.6315
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,16,1,0,0.6533
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8,1,0,0.6355
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,32,1,0,0.6446
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16,1,0,0.6465
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,64,1,0,0.6755
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,32,1,0,0.6530
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,128,1,0,0.6943
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,64,1,0,0.6238
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,256,1,0,0.7912
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,128,1,0,0.6999
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,256,1,0,0.7066
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,512,1,0,1.1047
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,512,1,0,0.7907
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1024,1,0,1.1249
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1024,1,0,2.0834
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,2048,1,0,2.3453
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,2048,1,0,4.4074
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4096,1,0,5.1984
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1,1,0,0.9355
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4096,1,0,10.8268
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4,1,0,1.1149
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1,1,0,0.6991
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,8,1,0,1.1189
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4,1,0,0.7014
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,16,1,0,1.1260
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,8,1,0,0.7145
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,32,1,0,1.1160
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,16,1,0,0.7079
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,64,1,0,0.7048
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,32,1,0,0.7355
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,128,1,0,0.7307
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,256,1,0,0.8484
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,64,1,0,0.7523
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,128,1,0,0.8331
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,512,1,0,1.1166
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,256,1,0,1.1247
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1024,1,0,2.0991
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,512,1,0,2.0255
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,2048,1,0,4.4603
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1024,1,0,4.0156
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4096,1,0,10.6405
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,2048,1,0,9.0077
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1,1,0,0.8124
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1,1,0,0.7366
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,4,1,0,0.8205
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4,1,0,0.7392
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,8,1,0,0.8113
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,8,1,0,0.7553
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,16,1,0,0.8257
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,16,1,0,0.7670
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,32,1,0,0.8747
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,32,1,0,0.7700
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,64,1,0,0.9579
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,64,1,0,0.7958
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,128,1,0,1.2242
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,128,1,0,0.8973
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,256,1,0,2.0835
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,256,1,0,1.1369
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,512,1,0,2.0285
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,512,1,0,3.9090
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1024,1,0,4.0220
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1024,1,0,8.1825
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1,1,0,1.0337
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,2048,1,0,8.7770
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,4,1,0,1.0440
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,8,1,0,1.0843
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1,1,0,0.8316
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,16,1,0,1.1239
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,4,1,0,0.8630
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,32,1,0,1.2275
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,8,1,0,0.8659
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,16,1,0,0.8822
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,32,1,0,0.9193
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,64,1,0,1.4505
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,64,1,0,0.9880
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,128,1,0,1.2322
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,128,1,0,2.2709
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,256,1,0,2.0828
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,512,1,0,3.8918
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,256,1,0,4.0110
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1024,1,0,8.0582
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1,1,0,1.0479
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,4,1,0,1.0938
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,512,1,0,7.7780
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,8,1,0,1.1180
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,1,1,0,1.5269
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,16,1,0,1.1554
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,32,1,0,1.2640
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,4,1,0,1.5809
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,8,1,0,1.6284
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,64,1,0,1.4801
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,16,1,0,1.7115
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,32,1,0,1.9294
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,128,1,0,2.2703
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,64,1,0,2.7179
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,128,1,0,4.3623
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,256,1,0,4.0669
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,256,1,0,8.0095
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,1,1,0,2.6248
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,512,1,0,7.6450
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,4,1,0,2.6393
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,1,1,0,1.5593
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,8,1,0,2.7986
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,4,1,0,1.6294
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,8,1,0,1.7072
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,16,1,0,2.9177
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,16,1,0,3.1120
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,32,1,0,2.0090
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,32,1,0,3.7394
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,64,1,0,2.7610
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,128,1,0,4.3627
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,64,1,0,5.2309
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,256,1,0,8.7952
SGLang,0.5.10,NVIDIA H200,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,128,1,0,8.7551
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,1,1,0,2.6512
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,4,1,0,2.7058
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,8,1,0,2.8984
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,16,1,0,3.0195
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,32,1,0,3.8093
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,64,1,0,5.3443
SGLang,0.5.10,NVIDIA H200,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,128,1,0,10.4868
