framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1,1,0,0.6505
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1,1,0,0.7191
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1,1,0,0.6853
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1,1,0,0.7179
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1,1,0,0.7119
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1,1,0,0.6600
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4,1,0,0.6886
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4,1,0,0.7238
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8,1,0,0.6776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4,1,0,0.6599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8,1,0,0.6794
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4,1,0,0.6827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4,1,0,0.6546
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16,1,0,0.6461
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8,1,0,0.6153
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16,1,0,0.6506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4,1,0,0.6408
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8,1,0,0.6279
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16,1,0,0.6194
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8,1,0,0.6627
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16,1,0,0.6913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,32,1,0,0.6707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,32,1,0,0.6850
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16,1,0,0.6253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,32,1,0,0.6516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,32,1,0,0.7254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,64,1,0,0.6942
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,64,1,0,0.7298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,32,1,0,0.7200
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,64,1,0,0.6944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,128,1,0,0.7146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,128,1,0,0.7396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,64,1,0,0.7240
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,128,1,0,0.6958
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,256,1,0,0.7220
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,256,1,0,0.7492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,128,1,0,0.7258
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,256,1,0,0.7061
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,512,1,0,0.6972
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,512,1,0,0.7198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,256,1,0,0.7116
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8,1,0,0.6528
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,512,1,0,0.6783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1024,1,0,0.6840
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1024,1,0,0.6902
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16,1,0,0.7086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,512,1,0,0.7257
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1024,1,0,0.6873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,2048,1,0,0.7630
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,2048,1,0,0.7827
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,2048,1,0,0.6765
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,32,1,0,0.7053
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1024,1,0,0.7390
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4096,1,0,0.9810
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4096,1,0,1.0050
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,2048,1,0,0.8760
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4096,1,0,0.7519
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,64,1,0,0.6699
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,64,1,0,0.7299
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8192,1,0,1.8436
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8192,1,0,1.8329
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8192,1,0,1.1768
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,128,1,0,0.6771
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4096,1,0,1.3738
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,128,1,0,0.6604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,256,1,0,0.6632
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,256,1,0,0.7071
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,512,1,0,0.6813
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16384,1,0,2.6518
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,512,1,0,0.6843
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8192,1,0,3.2824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1,1,0,0.6427
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1024,1,0,0.6913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1024,1,0,0.7485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4,1,0,0.6117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,2048,1,0,0.6901
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16384,1,0,4.9731
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,2048,1,0,0.8944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8,1,0,0.6467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4096,1,0,0.7760
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16,1,0,0.5890
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4096,1,0,1.3585
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,32,1,0,0.5980
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8192,1,0,1.1959
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,64,1,0,0.6178
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,128,1,0,0.5675
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,256,1,0,0.5763
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,512,1,0,0.5690
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8192,1,0,3.2253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1024,1,0,0.5860
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,2048,1,0,0.6355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16384,1,0,5.0318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4096,1,0,0.9099
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1,1,0,0.5796
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16384,1,0,10.3329
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4,1,0,0.5795
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8192,1,0,1.8404
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8,1,0,0.5776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1,1,0,0.5779
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16,1,0,0.6281
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4,1,0,0.6295
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,32,1,0,0.5837
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8,1,0,0.5887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,64,1,0,0.6176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16,1,0,0.6020
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16384,1,0,2.6171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,128,1,0,0.5748
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,32,1,0,0.5887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1,1,0,0.6074
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,256,1,0,0.5855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,64,1,0,0.5789
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4,1,0,0.6046
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,512,1,0,0.6053
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,128,1,0,0.5960
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8,1,0,0.6075
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16384,1,0,10.3790
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16384,1,0,5.3707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1024,1,0,0.6636
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16,1,0,0.6056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1,1,0,0.6080
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1,1,0,0.5648
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,2048,1,0,0.7906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4,1,0,0.5617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8,1,0,0.5654
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4096,1,0,1.3689
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16,1,0,0.5644
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,32,1,0,0.5698
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,64,1,0,0.5667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,128,1,0,0.5747
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8192,1,0,3.3579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,256,1,0,0.5801
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,512,1,0,0.5851
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,256,1,0,0.6133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1,1,0,0.6379
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1024,1,0,0.6296
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,512,1,0,0.6516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,32,1,0,0.5980
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4,1,0,0.6133
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,2048,1,0,0.8086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1024,1,0,0.7544
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,64,1,0,0.6073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8,1,0,0.6038
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,128,1,0,0.5950
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16,1,0,0.5964
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,2048,1,0,1.1211
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4096,1,0,1.4140
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,256,1,0,0.6063
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,32,1,0,0.5947
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,512,1,0,0.5986
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,64,1,0,0.6030
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1024,1,0,0.6302
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,128,1,0,0.5985
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4096,1,0,2.4145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,2048,1,0,0.6667
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,256,1,0,0.6120
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,512,1,0,0.6296
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4096,1,0,0.9486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8192,1,0,3.5287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1024,1,0,0.6759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,2048,1,0,0.8165
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8192,1,0,1.8447
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4,1,0,0.6150
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16384,1,0,10.5834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4096,1,0,1.3565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8,1,0,0.6100
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1,1,0,0.5823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16,1,0,0.6109
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4,1,0,0.5831
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,32,1,0,0.5951
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8,1,0,0.5792
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,64,1,0,0.6043
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16,1,0,0.5834
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,128,1,0,0.6032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8192,1,0,3.3405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,32,1,0,0.5857
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,256,1,0,0.6351
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16384,1,0,5.3054
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,512,1,0,0.6729
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1,1,0,0.5903
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1024,1,0,0.7860
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4,1,0,0.6150
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,2048,1,0,1.1390
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8,1,0,0.6080
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16,1,0,0.6157
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,32,1,0,0.6038
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,64,1,0,0.6033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4096,1,0,2.4269
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8192,1,0,6.4776
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,128,1,0,0.6085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,256,1,0,0.6166
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,512,1,0,0.6273
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,64,1,0,0.5810
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1024,1,0,0.6745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,128,1,0,0.6067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,256,1,0,0.6012
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,2048,1,0,0.8434
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16384,1,0,10.3913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,512,1,0,0.6579
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1,1,0,0.5920
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4096,1,0,1.4181
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1024,1,0,0.7396
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8192,1,0,6.3504
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4,1,0,0.6073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,2048,1,0,1.1474
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8,1,0,0.6050
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16,1,0,0.6175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,32,1,0,0.5999
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8192,1,0,3.5033
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,64,1,0,0.6035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4096,1,0,2.4715
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,128,1,0,0.6065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,256,1,0,0.6345
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,512,1,0,0.6728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1024,1,0,0.7624
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,2048,1,0,1.1482
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8192,1,0,6.7149
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4096,1,0,2.4515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16384,1,0,20.6006
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1,1,0,0.5916
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4,1,0,0.5897
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8,1,0,0.5951
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8192,1,0,6.6106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16,1,0,0.5920
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,32,1,0,0.6059
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,64,1,0,0.5882
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,128,1,0,0.6114
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,256,1,0,0.6879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,512,1,0,0.7253
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1024,1,0,1.0264
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,2048,1,0,1.9799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16384,1,0,19.9311
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1,1,0,0.6067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4,1,0,0.6030
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4096,1,0,4.7549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16384,1,0,20.5458
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1,1,0,0.6012
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4,1,0,0.5937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8,1,0,0.6107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16,1,0,0.5887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,32,1,0,0.5891
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,64,1,0,0.6021
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,128,1,0,0.6111
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8,1,0,0.5993
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16384,1,0,20.5819
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16,1,0,0.5956
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1,1,0,0.6198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,32,1,0,0.6001
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,64,1,0,0.6028
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,128,1,0,0.6215
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,256,1,0,0.6614
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,512,1,0,0.7613
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8192,1,0,13.3449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1024,1,0,1.0295
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,256,1,0,0.6519
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,2048,1,0,2.0003
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,512,1,0,0.7286
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4,1,0,0.6180
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1024,1,0,1.0459
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8,1,0,0.6171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16,1,0,0.6052
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,32,1,0,0.6030
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,2048,1,0,2.0535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,64,1,0,0.6332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4096,1,0,4.5532
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,128,1,0,0.6331
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,256,1,0,0.6803
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,512,1,0,0.7410
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1024,1,0,1.0609
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4096,1,0,4.7459
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,2048,1,0,2.0578
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4096,1,0,4.7662
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8192,1,0,13.2295
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8192,1,0,13.7126
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1,1,0,0.5898
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4,1,0,0.5864
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8,1,0,0.5771
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1,1,0,0.6009
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16,1,0,0.5896
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8192,1,0,13.5148
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4,1,0,0.6050
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8,1,0,0.6405
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16,1,0,0.5951
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16384,1,0,40.7671
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1,1,0,0.6003
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4,1,0,0.5933
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8,1,0,0.9545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16,1,0,0.6098
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,32,1,0,0.6027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,64,1,0,0.6193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,128,1,0,0.6610
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,256,1,0,0.7581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,512,1,0,0.9872
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1024,1,0,1.7940
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,2048,1,0,3.7275
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16384,1,0,40.1134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1,1,0,0.6180
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4,1,0,0.6198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8,1,0,0.6204
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16,1,0,0.6217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,32,1,0,0.6077
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4096,1,0,9.4995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,64,1,0,0.6486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,128,1,0,0.6691
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,256,1,0,0.7541
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16384,1,0,42.0350
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,512,1,0,0.9734
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1,1,0,0.6017
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1024,1,0,1.7895
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16384,1,0,41.0262
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1,1,0,0.6283
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,2048,1,0,3.7141
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4,1,0,0.6274
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8,1,0,0.6278
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16,1,0,0.6281
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,32,1,0,0.6262
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4,1,0,0.6056
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,64,1,0,0.6359
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8,1,0,0.6040
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,128,1,0,0.6923
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,256,1,0,0.7510
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,512,1,0,1.0057
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1024,1,0,1.8358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4096,1,0,9.3733
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16,1,0,0.5950
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,2048,1,0,3.8987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,32,1,0,0.6249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,64,1,0,0.6347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,128,1,0,0.6736
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,256,1,0,0.7299
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8192,1,0,26.1242
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,512,1,0,1.0147
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1024,1,0,1.8478
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4096,1,0,9.7497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,2048,1,0,3.9413
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1,1,0,0.6466
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4,1,0,0.6590
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,8,1,0,0.6502
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,16,1,0,0.6535
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,32,1,0,0.6675
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,64,1,0,0.7245
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,128,1,0,0.7829
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,256,1,0,1.0293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,512,1,0,1.7719
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4096,1,0,9.8248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1,1,0,0.6407
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8192,1,0,25.7449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4,1,0,0.6347
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1024,1,0,3.4836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,8,1,0,0.6293
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,2048,1,0,8.0027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16384,1,0,10.6517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1,1,0,0.5739
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4,1,0,0.5856
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8,1,0,1.0125
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16,1,0,0.5740
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,32,1,0,0.5814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,64,1,0,0.5764
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,128,1,0,0.5862
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,256,1,0,0.6120
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,512,1,0,0.6470
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1024,1,0,0.7536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,2048,1,0,1.1955
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4096,1,0,2.6277
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,16,1,0,0.6390
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,32,1,0,0.6565
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,64,1,0,0.7013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,128,1,0,0.7694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,256,1,0,1.0384
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4096,1,0,19.5156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,512,1,0,1.7867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1,1,0,0.6979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4,1,0,0.7227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8192,1,0,7.1248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,8,1,0,0.7009
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16384,1,0,10.4064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,16,1,0,0.7240
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1,1,0,0.6097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1024,1,0,3.4690
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,32,1,0,0.7922
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4,1,0,0.6077
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,64,1,0,0.8416
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8,1,0,0.6154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16,1,0,0.6134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,128,1,0,1.0702
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,32,1,0,0.6093
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,64,1,0,0.6360
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,256,1,0,1.8244
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,128,1,0,0.6166
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,256,1,0,0.6392
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,512,1,0,0.6714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1024,1,0,0.7877
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,512,1,0,3.3171
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,2048,1,0,7.8878
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,2048,1,0,1.2031
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4096,1,0,2.6316
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1024,1,0,6.9442
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8192,1,0,6.8871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16384,1,0,21.3871
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1,1,0,0.6069
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4,1,0,0.6271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8,1,0,0.6176
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16,1,0,0.6107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,32,1,0,0.6203
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,64,1,0,0.6303
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,128,1,0,0.6444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,256,1,0,0.6707
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,512,1,0,0.7830
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4096,1,0,19.5072
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1024,1,0,1.1177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1,1,0,0.7045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16384,1,0,83.7722
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4,1,0,0.6968
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,2048,1,0,2.2222
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1,1,0,0.6106
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,2048,1,0,15.9628
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,8,1,0,0.7008
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4,1,0,0.6107
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1,1,0,0.8587
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8,1,0,0.6211
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16,1,0,0.6233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,4,1,0,0.8581
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,32,1,0,0.6320
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,8,1,0,0.8879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,64,1,0,0.6741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,16,1,0,0.9323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,128,1,0,0.7599
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4096,1,0,5.2123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,32,1,0,1.0159
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,256,1,0,0.9914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1,1,0,0.6455
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4,1,0,0.6597
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,64,1,0,1.2201
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,512,1,0,1.7329
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,8,1,0,0.6448
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,16,1,0,0.7432
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16384,1,0,21.1130
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,16,1,0,0.6485
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,32,1,0,0.7571
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1,1,0,0.6355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,128,1,0,1.9126
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,64,1,0,0.8327
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,32,1,0,0.6593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,64,1,0,0.6858
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,128,1,0,1.1285
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1024,1,0,3.3187
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,128,1,0,0.7653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,256,1,0,3.3868
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,256,1,0,0.8693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,256,1,0,1.8379
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16384,1,0,82.0536
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,512,1,0,1.1044
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1,1,0,0.6375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4,1,0,0.6301
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1024,1,0,2.0557
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8,1,0,0.6263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4,1,0,0.6248
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16,1,0,0.6311
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8,1,0,0.6298
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,32,1,0,0.6449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,2048,1,0,7.5936
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16,1,0,0.6232
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,64,1,0,0.6898
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,32,1,0,0.6291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,512,1,0,6.5572
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,128,1,0,0.7749
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,64,1,0,0.6355
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,256,1,0,0.9876
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,128,1,0,0.6542
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,2048,1,0,4.3374
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,256,1,0,0.6940
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,512,1,0,0.7703
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,512,1,0,1.6950
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,512,1,0,3.3722
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1024,1,0,1.1189
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1024,1,0,3.3167
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,2048,1,0,2.2360
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1024,1,0,6.9255
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4096,1,0,5.1341
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4096,1,0,10.4441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1,1,0,0.6499
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1,1,0,0.7115
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4,1,0,0.6676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1024,1,0,14.0525
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,8,1,0,0.6611
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4,1,0,0.7650
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,16,1,0,0.6621
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1,1,0,1.1783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,8,1,0,0.7475
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,32,1,0,0.6576
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,16,1,0,0.7311
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,64,1,0,0.6750
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4096,1,0,19.0335
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,4,1,0,1.2174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,32,1,0,0.7360
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,128,1,0,0.7299
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1,1,0,0.6855
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,64,1,0,0.7594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,256,1,0,0.8086
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,8,1,0,1.2841
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4,1,0,0.6508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,128,1,0,0.9166
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,512,1,0,1.0966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,8,1,0,0.6586
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,16,1,0,1.3252
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,2048,1,0,7.7805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,16,1,0,0.6709
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,32,1,0,0.7008
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,32,1,0,1.5249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,64,1,0,0.7906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,128,1,0,1.0198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,64,1,0,2.1607
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,256,1,0,1.7533
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,2048,1,0,15.8888
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,256,1,0,1.1653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1,1,0,0.8169
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,128,1,0,3.5675
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,512,1,0,3.2048
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1024,1,0,2.0460
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,4,1,0,0.8271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,512,1,0,2.0138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,8,1,0,0.8477
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,16,1,0,0.9055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,32,1,0,1.0083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,2048,1,0,4.3062
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,64,1,0,1.2339
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1024,1,0,3.9549
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,256,1,0,6.6032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1024,1,0,6.4724
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,128,1,0,1.9426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4096,1,0,19.1150
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1,1,0,0.6680
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4,1,0,0.6479
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,256,1,0,3.4812
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,8,1,0,0.6560
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,16,1,0,0.6718
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,32,1,0,0.7155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,2048,1,0,8.6810
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4096,1,0,10.3757
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1,1,0,0.8793
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1,1,0,0.7103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4,1,0,0.7515
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,4,1,0,0.8780
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,8,1,0,0.7055
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,512,1,0,6.6373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,8,1,0,0.8761
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,16,1,0,0.7145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,512,1,0,13.3441
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,16,1,0,0.8870
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,32,1,0,0.7621
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,32,1,0,0.9143
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,64,1,0,0.7645
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,2048,1,0,15.3085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,64,1,0,0.8097
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,128,1,0,0.8560
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,64,1,0,0.9979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,128,1,0,1.0224
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,256,1,0,1.1552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,128,1,0,1.2783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,256,1,0,1.7120
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,512,1,0,1.9705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,256,1,0,2.1168
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,512,1,0,3.1305
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,1,1,0,1.8127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1024,1,0,3.8801
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,512,1,0,3.8852
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,4,1,0,1.8963
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1024,1,0,14.1889
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,8,1,0,2.0287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1,1,0,1.1552
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1024,1,0,6.6659
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,4,1,0,1.1778
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,16,1,0,2.1596
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,8,1,0,1.2210
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,2048,1,0,8.6846
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1024,1,0,7.8302
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,16,1,0,1.2943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,32,1,0,2.7717
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1,1,0,0.8221
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1,1,0,1.0913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,32,1,0,1.5174
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,4,1,0,0.8337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,4,1,0,1.0790
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,8,1,0,0.8308
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,16,1,0,0.8575
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,8,1,0,1.1144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,64,1,0,4.1484
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,32,1,0,0.8793
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,16,1,0,1.1428
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,64,1,0,0.9741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,32,1,0,1.2608
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,128,1,0,1.2444
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,64,1,0,1.4989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,2048,1,0,14.9743
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,256,1,0,2.0894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,64,1,0,2.1830
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,128,1,0,2.3159
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,128,1,0,6.9668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,512,1,0,3.8193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,128,1,0,3.6390
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,256,1,0,4.0730
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4096,1,0,38.7758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1,1,0,0.7978
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4,1,0,0.7520
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,256,1,0,6.7758
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1024,1,0,7.7605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1,1,0,1.0628
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,512,1,0,7.6199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,4,1,0,1.0861
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,256,1,0,13.2913
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,8,1,0,1.1065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,1,1,0,1.6162
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,16,1,0,1.1514
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,8,1,0,0.7553
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,4,1,0,1.6957
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,1,1,0,3.1235
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,32,1,0,1.2492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,16,1,0,0.8110
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,32,1,0,0.8845
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,8,1,0,1.7251
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,64,1,0,1.4804
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,64,1,0,1.0844
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,16,1,0,1.8145
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,4,1,0,3.2377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,128,1,0,2.2831
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,128,1,0,1.7997
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,512,1,0,13.5123
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,32,1,0,2.0317
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,256,1,0,3.2185
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,8,1,0,3.4630
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,1,1,0,1.8064
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,256,1,0,4.0120
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,64,1,0,2.7626
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,4,1,0,1.8853
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,16,1,0,4.0192
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,8,1,0,1.9275
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,512,1,0,6.3465
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,128,1,0,4.4282
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4096,1,0,37.7007
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,16,1,0,2.1300
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1,1,0,0.7705
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,512,1,0,7.4769
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4,1,0,0.7983
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,8,1,0,0.8022
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,32,1,0,5.3129
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,32,1,0,2.7584
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,1,1,0,1.6057
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,16,1,0,0.8676
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,32,1,0,0.9817
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,4,1,0,1.7492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,64,1,0,1.1759
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,256,1,0,7.9531
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,128,1,0,1.8640
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,8,1,0,1.7651
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,16,1,0,1.7937
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1024,1,0,13.4146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,256,1,0,3.7941
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,64,1,0,7.9745
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,32,1,0,2.0219
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,64,1,0,4.1177
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,64,1,0,2.8449
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,512,1,0,6.2412
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,1,1,0,2.7035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,128,1,0,4.4155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,4,1,0,2.7683
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,128,1,0,7.0818
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,8,1,0,2.8497
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,128,1,0,13.9148
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,256,1,0,7.8262
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,16,1,0,3.0254
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1024,1,0,13.2726
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,1,1,0,2.6879
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,32,1,0,3.8227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,256,1,0,13.6693
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,4,1,0,2.8155
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,2048,1,0,30.4132
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,1,1,0,3.0938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,8,1,0,2.9370
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1,1,0,0.9894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,4,1,0,0.9835
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,8,1,0,1.0370
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,4,1,0,3.1853
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,16,1,0,3.1038
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,16,1,0,1.1368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,32,1,0,1.3037
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,64,1,0,5.2786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,8,1,0,3.3911
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,64,1,0,1.9246
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,32,1,0,3.7900
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,128,1,0,3.3213
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,16,1,0,4.0134
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,64,1,0,5.3467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,128,1,0,8.6604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,2048,1,0,30.0891
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,256,1,0,6.2985
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,32,1,0,5.2259
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1,1,0,0.9824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,4,1,0,1.0113
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,8,1,0,1.0670
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,16,1,0,1.1445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,32,1,0,1.3032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,128,1,0,8.6510
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,64,1,0,1.9210
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,64,1,0,8.0236
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,128,1,0,3.2522
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,512,1,0,12.8408
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,256,1,0,6.2706
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,128,1,0,14.3025
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,512,1,0,12.4473
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1024,1,0,27.2791
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1,1,0,1.4369
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,4,1,0,1.4539
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,8,1,0,1.5594
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,16,1,0,1.7148
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,32,1,0,2.2950
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,64,1,0,3.5906
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1024,1,0,26.5588
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1,1,0,1.4605
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,128,1,0,6.4109
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,4,1,0,1.6128
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,8,1,0,1.6144
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,16,1,0,1.7732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,32,1,0,2.3380
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,64,1,0,3.5653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,256,1,0,12.6979
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,128,1,0,6.4633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,256,1,0,12.3682
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,32,1,0,0.5987
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,32,1,0,0.6018
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,512,1,0,26.3529
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,1,1,0,2.2492
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,4,1,0,2.3452
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,8,1,0,2.6788
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,16,1,0,3.1684
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,512,1,0,24.9699
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,1,1,0,2.3218
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,32,1,0,4.3369
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,4,1,0,2.4953
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,8,1,0,2.9304
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,64,1,0,7.0227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,16,1,0,3.2067
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,32,1,0,4.3990
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,128,1,0,12.7141
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,64,1,0,6.8967
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,128,1,0,12.6426
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,256,1,0,25.5995
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,1,1,0,4.1716
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,256,1,0,24.6944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,1,1,0,4.1914
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,4,1,0,4.8469
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,4,1,0,4.4797
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,8,1,0,4.7725
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,8,1,0,5.0786
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,16,1,0,5.9822
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,16,1,0,6.3737
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,32,1,0,8.5146
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,32,1,0,8.7183
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,64,1,0,14.6799
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,64,1,0,14.0136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,128,1,0,25.5802
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,128,1,0,25.2152
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,64,1,0,0.5904
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,64,1,0,0.5887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,128,1,0,0.6010
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,128,1,0,0.9088
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,256,1,0,0.6516
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,256,1,0,0.8841
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,512,1,0,0.6353
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1024,1,0,0.8622
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,2048,1,0,1.1574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4096,1,0,2.3373
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,512,1,0,0.6230
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1024,1,0,0.7301
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,2048,1,0,1.1574
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4096,1,0,2.4836
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8192,1,0,6.3943
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8192,1,0,6.6217
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16384,1,0,20.3281
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1,1,0,0.6297
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4,1,0,0.6112
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16384,1,0,21.0227
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8,1,0,0.6481
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16,1,0,0.5966
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1,1,0,0.7368
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,32,1,0,0.5869
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,64,1,0,0.7083
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4,1,0,0.5801
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8,1,0,0.5714
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,128,1,0,0.7358
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,256,1,0,0.6409
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16,1,0,0.8789
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,512,1,0,0.7219
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,32,1,0,0.5728
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,64,1,0,0.5684
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1024,1,0,1.0517
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,128,1,0,0.5741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,256,1,0,0.6204
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,2048,1,0,1.9375
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4096,1,0,4.5199
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,512,1,0,0.6988
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1024,1,0,1.2617
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8192,1,0,13.0700
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,2048,1,0,1.9592
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4096,1,0,4.6386
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8192,1,0,13.6013
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16384,1,0,39.7670
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1,1,0,0.5973
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4,1,0,0.9263
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8,1,0,0.9191
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16,1,0,0.9245
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,32,1,0,0.9233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,64,1,0,0.5955
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16384,1,0,40.9193
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,128,1,0,0.6268
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1,1,0,0.5720
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,256,1,0,0.7302
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,512,1,0,1.0149
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4,1,0,0.6233
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8,1,0,0.7081
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1024,1,0,1.7488
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16,1,0,0.7241
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,32,1,0,0.7365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,64,1,0,0.6310
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,128,1,0,0.6287
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,2048,1,0,3.6610
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,256,1,0,0.7136
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,512,1,0,1.0198
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1024,1,0,1.7404
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4096,1,0,9.1271
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,2048,1,0,3.7506
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4096,1,0,9.4989
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8192,1,0,25.6482
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8192,1,0,25.9894
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16384,1,0,83.8332
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1,1,0,0.7570
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4,1,0,0.5974
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8,1,0,0.5971
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16,1,0,0.6001
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,32,1,0,0.6027
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,64,1,0,0.6414
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,128,1,0,0.7284
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16384,1,0,86.5567
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,256,1,0,1.0032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1,1,0,0.7604
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,512,1,0,1.6919
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4,1,0,0.5951
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8,1,0,0.5862
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16,1,0,0.5874
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1024,1,0,3.2228
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,32,1,0,0.5918
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,64,1,0,0.6377
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,128,1,0,0.7127
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,256,1,0,1.0126
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,2048,1,0,7.2741
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,512,1,0,1.6732
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1024,1,0,3.2753
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4096,1,0,18.7299
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,2048,1,0,7.3365
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4096,1,0,19.1977
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8192,1,0,51.7302
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8192,1,0,52.8069
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16384,1,0,166.9808
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1,1,0,0.9810
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4,1,0,0.9650
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8,1,0,0.9603
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16,1,0,0.6095
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,32,1,0,0.6508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,64,1,0,0.7508
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,128,1,0,1.0109
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,256,1,0,1.6315
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,512,1,0,3.0684
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1024,1,0,6.4638
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16384,1,0,176.3135
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,2048,1,0,15.0117
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1,1,0,0.7924
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4,1,0,0.6029
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8,1,0,0.6154
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16,1,0,0.6085
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,32,1,0,0.6488
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,64,1,0,0.8103
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,128,1,0,1.0653
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,256,1,0,1.6712
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,512,1,0,3.1386
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4096,1,0,36.8291
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1,1,0,0.6629
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1024,1,0,6.4096
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4,1,0,0.6823
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,8,1,0,0.6873
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,16,1,0,0.9045
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,32,1,0,0.8476
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,64,1,0,1.1156
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,128,1,0,1.6805
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,2048,1,0,15.1814
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,256,1,0,3.0486
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,512,1,0,5.9633
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1024,1,0,13.0949
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4096,1,0,37.8371
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1,1,0,0.6867
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4,1,0,0.8175
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,8,1,0,0.8188
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,16,1,0,0.7553
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,32,1,0,0.7945
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,64,1,0,1.0694
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,128,1,0,1.6808
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,256,1,0,3.1593
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,2048,1,0,29.4625
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,512,1,0,6.2830
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1024,1,0,13.5713
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,2048,1,0,30.5323
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4096,1,0,76.3905
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1,1,0,0.8821
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4,1,0,0.8991
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,8,1,0,0.9215
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,16,1,0,1.0159
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,32,1,0,1.3537
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,64,1,0,1.7839
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,128,1,0,3.1221
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,256,1,0,5.9057
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,512,1,0,12.0475
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4096,1,0,80.4938
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1024,1,0,25.8124
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1,1,0,1.3438
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4,1,0,0.8915
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,8,1,0,0.9070
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,16,1,0,1.0427
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,32,1,0,1.2934
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,64,1,0,2.0094
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,128,1,0,3.3770
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,256,1,0,6.1065
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,512,1,0,12.8272
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,2048,1,0,59.7421
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1024,1,0,26.8606
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1,1,0,1.1965
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,4,1,0,1.7609
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,8,1,0,1.8395
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,16,1,0,1.8773
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,32,1,0,2.0740
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,64,1,0,3.3370
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,128,1,0,5.9629
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,256,1,0,11.8981
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,2048,1,0,61.9982
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1,1,0,1.2737
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,512,1,0,24.0562
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,4,1,0,1.1884
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,8,1,0,1.2783
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,16,1,0,1.5068
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,32,1,0,2.0337
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,64,1,0,3.3279
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,128,1,0,6.5035
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,256,1,0,12.4142
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1024,1,0,51.9885
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1,1,0,2.6430
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,4,1,0,2.8318
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,8,1,0,3.0338
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,512,1,0,25.9315
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,16,1,0,3.4621
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,32,1,0,4.5061
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,64,1,0,7.1494
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,128,1,0,11.9520
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1024,1,0,55.8249
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1,1,0,2.5024
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,256,1,0,24.0929
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,4,1,0,1.8851
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,8,1,0,2.1016
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,16,1,0,2.7302
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,32,1,0,4.4459
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,64,1,0,6.5138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,512,1,0,48.7389
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,128,1,0,12.2750
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,1,1,0,3.1699
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,4,1,0,3.6970
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,8,1,0,4.0944
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,16,1,0,5.2907
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,256,1,0,25.2507
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,32,1,0,7.6032
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,64,1,0,13.6887
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,128,1,0,23.9519
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,512,1,0,53.0598
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,1,1,0,3.2004
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,4,1,0,3.3206
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,8,1,0,3.8610
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,16,1,0,5.2591
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,32,1,0,7.6073
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,256,1,0,48.8445
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,1,1,0,5.8982
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,64,1,0,12.9928
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,4,1,0,7.7467
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,8,1,0,7.9696
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,16,1,0,10.4545
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,128,1,0,24.8635
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,32,1,0,15.2949
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,64,1,0,26.1524
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,256,1,0,50.8750
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,1,1,0,5.8948
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,4,1,0,6.3824
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,128,1,0,48.9744
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,8,1,0,8.3687
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,16,1,0,11.4677
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,32,1,0,15.3138
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,64,1,0,26.3668
SGLang,0.5.9,NVIDIA H100 80GB HBM3,mla_context,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,128,1,0,51.0790
