framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1,1,0,0.0915
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4,1,0,0.0915
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8,1,0,0.0913
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16,1,0,0.0915
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1,1,0,0.0638
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1,1,0,0.0476
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,32,1,0,0.0999
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4,1,0,0.0638
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,64,1,0,0.0988
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4,1,0,0.0473
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8,1,0,0.0639
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1,1,0,0.0378
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8,1,0,0.0488
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16,1,0,0.0641
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4,1,0,0.0378
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16,1,0,0.0477
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,32,1,0,0.0643
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8,1,0,0.0377
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,32,1,0,0.0525
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,64,1,0,0.0653
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16,1,0,0.0380
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,64,1,0,0.0515
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,32,1,0,0.0379
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,128,1,0,0.0514
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,64,1,0,0.0395
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,256,1,0,0.0505
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,128,1,0,0.0411
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1,1,0,0.0638
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,256,1,0,0.0406
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4,1,0,0.0639
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1,1,0,0.0928
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,512,1,0,0.0422
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8,1,0,0.0642
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1024,1,0,0.0428
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16,1,0,0.0644
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,2048,1,0,0.0435
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,32,1,0,0.0728
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4096,1,0,0.0455
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,64,1,0,0.0703
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1,1,0,0.0356
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8192,1,0,0.0485
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,128,1,0,0.0689
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,128,1,0,0.0975
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4,1,0,0.0357
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,256,1,0,0.0691
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16384,1,0,0.0553
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,256,1,0,0.0969
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8,1,0,0.0356
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,512,1,0,0.0964
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16,1,0,0.0359
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1024,1,0,0.0966
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,32,1,0,0.0380
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,2048,1,0,0.0983
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4096,1,0,0.1017
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8192,1,0,0.1062
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16384,1,0,0.1150
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1,1,0,0.0483
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4,1,0,0.0483
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8,1,0,0.0484
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16,1,0,0.0484
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,32,1,0,0.0485
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,64,1,0,0.0502
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,512,1,0,0.0684
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,128,1,0,0.0524
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1024,1,0,0.0731
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,256,1,0,0.0526
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,2048,1,0,0.0731
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,512,1,0,0.0551
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4096,1,0,0.0749
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1024,1,0,0.0557
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8192,1,0,0.0788
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,512,1,0,0.0527
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,2048,1,0,0.0561
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16384,1,0,0.0859
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,1024,1,0,0.0566
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4096,1,0,0.0590
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,2048,1,0,0.0567
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8192,1,0,0.0624
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4,1,0,0.0927
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,4096,1,0,0.0592
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16384,1,0,0.0669
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8,1,0,0.0924
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16,1,0,0.0930
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,32,1,0,0.0927
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,64,1,0,0.0941
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,128,1,0,0.0963
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,128,1,0,0.0669
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,256,1,0,0.0982
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,256,1,0,0.0700
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,512,1,0,0.0724
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,1024,1,0,0.0716
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,2048,1,0,0.0726
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,4096,1,0,0.0750
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,8192,1,0,0.0636
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,8192,1,0,0.0775
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1,16384,1,0,0.0709
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1,16384,1,0,0.0835
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,64,1,0,0.0373
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,128,1,0,0.0370
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,512,1,0,0.1004
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,256,1,0,0.0384
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,1024,1,0,0.1008
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,512,1,0,0.0401
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,2048,1,0,0.1024
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,1024,1,0,0.0443
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,2048,1,0,0.0443
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1,1,0,0.0382
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,4096,1,0,0.0462
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4,1,0,0.0379
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,8192,1,0,0.0502
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8,1,0,0.0381
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1,16384,1,0,0.0593
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16,1,0,0.0382
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,32,1,0,0.0382
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,64,1,0,0.0397
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,128,1,0,0.0413
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1,1,0,0.0942
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4,1,0,0.0947
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8,1,0,0.0945
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16,1,0,0.0949
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,32,1,0,0.1050
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,4096,1,0,0.1036
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,64,1,0,0.1022
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,8192,1,0,0.1087
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,128,1,0,0.1006
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1,16384,1,0,0.1171
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,256,1,0,0.1008
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,512,1,0,0.1008
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1024,1,0,0.1020
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,2048,1,0,0.1047
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4096,1,0,0.1059
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,256,1,0,0.0409
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8192,1,0,0.1105
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,512,1,0,0.0427
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16384,1,0,0.1183
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1024,1,0,0.0439
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1,1,0,0.0485
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,2048,1,0,0.0441
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4,1,0,0.0484
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4096,1,0,0.0465
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8,1,0,0.0488
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8192,1,0,0.0524
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16,1,0,0.0486
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,32,1,0,0.0490
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,64,1,0,0.0503
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,128,1,0,0.0520
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,256,1,0,0.0526
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,512,1,0,0.0553
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1024,1,0,0.0558
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,2048,1,0,0.0572
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1,1,0,0.0640
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4096,1,0,0.0595
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4,1,0,0.0645
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8192,1,0,0.0629
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8,1,0,0.0645
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1,1,0,0.0990
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16384,1,0,0.0699
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4,1,0,0.0980
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1,1,0,0.0500
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4,1,0,0.0492
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8,1,0,0.0939
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1,1,0,0.0473
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8,1,0,0.0499
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16,1,0,0.0986
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4,1,0,0.0467
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1,1,0,0.0641
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16384,1,0,0.0599
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,32,1,0,0.1093
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8,1,0,0.0473
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4,1,0,0.0640
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1,1,0,0.0387
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16,1,0,0.0475
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,64,1,0,0.1064
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8,1,0,0.0643
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4,1,0,0.0387
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,32,1,0,0.0519
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,128,1,0,0.1053
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16,1,0,0.0641
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8,1,0,0.0389
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,64,1,0,0.0508
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,32,1,0,0.0640
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,256,1,0,0.1050
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16,1,0,0.0387
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,128,1,0,0.0501
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,64,1,0,0.0661
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,32,1,0,0.0390
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,256,1,0,0.0497
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,64,1,0,0.0402
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,512,1,0,0.0529
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,128,1,0,0.0420
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,1024,1,0,0.0528
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,256,1,0,0.0418
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,2048,1,0,0.0551
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,512,1,0,0.0434
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,4096,1,0,0.0581
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1024,1,0,0.0442
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,8192,1,0,0.0634
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,2048,1,0,0.0458
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1,1,0,0.0357
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,2,16384,1,0,0.0732
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4096,1,0,0.0512
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4,1,0,0.0357
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1,1,0,0.0483
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8,1,0,0.0358
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16,1,0,0.0496
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4,1,0,0.0479
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16,1,0,0.0360
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,32,1,0,0.0503
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8,1,0,0.0480
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,32,1,0,0.0383
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,64,1,0,0.0515
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16,1,0,0.0489
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,128,1,0,0.0533
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,32,1,0,0.0530
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,256,1,0,0.0543
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,64,1,0,0.0526
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,512,1,0,0.1071
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,512,1,0,0.0570
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,128,1,0,0.0525
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,128,1,0,0.0672
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1024,1,0,0.0579
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,256,1,0,0.0695
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,2048,1,0,0.0601
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,512,1,0,0.0710
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4096,1,0,0.0636
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16,1,0,0.0646
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8192,1,0,0.0697
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,32,1,0,0.0733
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1,1,0,0.0971
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,64,1,0,0.0713
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4,1,0,0.0968
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16384,1,0,0.0779
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8192,1,0,0.0593
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,128,1,0,0.0694
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8,1,0,0.0971
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,256,1,0,0.0690
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16,1,0,0.0974
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16384,1,0,0.0684
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,64,1,0,0.0378
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,32,1,0,0.0973
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1,1,0,0.0371
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,128,1,0,0.0376
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,64,1,0,0.0984
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,256,1,0,0.0387
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,128,1,0,0.1001
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,512,1,0,0.0408
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,256,1,0,0.1028
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,256,1,0,0.0521
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,512,1,0,0.1048
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,512,1,0,0.0523
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,1024,1,0,0.1050
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,1024,1,0,0.0544
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1024,1,0,0.0714
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,2048,1,0,0.0578
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,2048,1,0,0.0728
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4096,1,0,0.0768
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8192,1,0,0.0816
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1,1,0,0.0473
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16384,1,0,0.0900
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4,1,0,0.0474
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,512,1,0,0.0691
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,1024,1,0,0.0691
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4,1,0,0.0372
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,2048,1,0,0.0714
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8,1,0,0.0371
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,4096,1,0,0.0744
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1024,1,0,0.1099
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,1024,1,0,0.0408
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,8192,1,0,0.0793
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,2048,1,0,0.1104
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,2048,1,0,0.0427
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,2,16384,1,0,0.0898
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,4096,1,0,0.0462
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4096,1,0,0.1165
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1,1,0,0.0642
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,8192,1,0,0.0519
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,2048,1,0,0.1081
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8192,1,0,0.1223
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4,1,0,0.0638
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,2,16384,1,0,0.0663
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,4096,1,0,0.1132
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8,1,0,0.0636
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1,1,0,0.0365
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16384,1,0,0.1338
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,8192,1,0,0.1209
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4,1,0,0.0364
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1,1,0,0.0945
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8,1,0,0.0366
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,2,16384,1,0,0.1285
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4,1,0,0.0946
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16,1,0,0.0367
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1,1,0,0.0959
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8,1,0,0.0950
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1,1,0,0.0657
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4,1,0,0.0990
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16,1,0,0.0948
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8,1,0,0.1003
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,32,1,0,0.1063
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16,1,0,0.0998
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16,1,0,0.0369
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,64,1,0,0.1033
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,32,1,0,0.0995
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,32,1,0,0.0371
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,128,1,0,0.1028
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,64,1,0,0.1015
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,64,1,0,0.0387
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,256,1,0,0.1051
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,128,1,0,0.1029
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,128,1,0,0.0403
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,512,1,0,0.1061
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,256,1,0,0.0407
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1024,1,0,0.1077
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,4096,1,0,0.0607
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,2048,1,0,0.1136
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,8192,1,0,0.0672
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4096,1,0,0.1214
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,4,16384,1,0,0.0774
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1,1,0,0.0464
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,32,1,0,0.0397
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8,1,0,0.0478
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4,1,0,0.0463
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,64,1,0,0.0387
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16,1,0,0.0476
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8,1,0,0.0467
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,128,1,0,0.0384
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,32,1,0,0.0476
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16,1,0,0.0467
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,256,1,0,0.0394
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,64,1,0,0.0493
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,32,1,0,0.0515
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,512,1,0,0.0395
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,128,1,0,0.0507
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,64,1,0,0.0509
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,1024,1,0,0.0412
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,256,1,0,0.0537
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,128,1,0,0.0505
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,2048,1,0,0.0438
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,512,1,0,0.0554
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,256,1,0,0.0506
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,4096,1,0,0.0488
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,256,1,0,0.1070
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1024,1,0,0.0574
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,512,1,0,0.0423
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,8192,1,0,0.0595
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,2048,1,0,0.0612
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1024,1,0,0.0434
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16,1,0,0.0642
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,4,16384,1,0,0.0691
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4096,1,0,0.0688
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,2048,1,0,0.0497
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,32,1,0,0.0743
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1,1,0,0.0349
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8192,1,0,0.0772
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4,1,0,0.0350
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8192,1,0,0.1339
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8,1,0,0.0348
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4,1,0,0.0660
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16,1,0,0.0351
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16384,1,0,0.0937
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8,1,0,0.0659
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16384,1,0,0.1616
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,32,1,0,0.0375
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16,1,0,0.0661
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1,1,0,0.0472
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,64,1,0,0.0371
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1,1,0,0.0940
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,32,1,0,0.0660
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4,1,0,0.0475
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,128,1,0,0.0368
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4,1,0,0.0937
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,64,1,0,0.0676
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8,1,0,0.0477
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,256,1,0,0.0371
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8,1,0,0.0940
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,128,1,0,0.0692
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16,1,0,0.0478
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,512,1,0,0.0388
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,512,1,0,0.0529
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16,1,0,0.0945
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,256,1,0,0.0714
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,512,1,0,0.1086
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,1024,1,0,0.0411
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,1024,1,0,0.0550
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,32,1,0,0.1056
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,512,1,0,0.0734
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,2048,1,0,0.0456
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,1024,1,0,0.1113
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,64,1,0,0.1061
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1024,1,0,0.0740
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4096,1,0,0.0595
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,4096,1,0,0.0579
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,2048,1,0,0.1146
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,2048,1,0,0.0768
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,4096,1,0,0.1194
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8192,1,0,0.0684
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4096,1,0,0.0820
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,8192,1,0,0.0661
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,8192,1,0,0.1277
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16384,1,0,0.0852
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,8,16384,1,0,0.0851
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,4,16384,1,0,0.1387
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1,1,0,0.0350
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1,1,0,0.0369
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1,1,0,0.0969
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4,1,0,0.0349
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4,1,0,0.0370
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4,1,0,0.0973
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8,1,0,0.0351
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8,1,0,0.0371
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8,1,0,0.0972
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16,1,0,0.0371
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,32,1,0,0.0476
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16,1,0,0.0969
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,32,1,0,0.0373
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,64,1,0,0.0495
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,32,1,0,0.0972
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,64,1,0,0.0387
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,128,1,0,0.0505
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,64,1,0,0.0986
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,2048,1,0,0.0566
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,128,1,0,0.0403
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,256,1,0,0.0561
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,64,1,0,0.0709
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,128,1,0,0.1003
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,4096,1,0,0.0665
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,512,1,0,0.0577
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,128,1,0,0.0697
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,256,1,0,0.1058
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1024,1,0,0.0613
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,256,1,0,0.0689
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,512,1,0,0.1076
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8192,1,0,0.0903
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,512,1,0,0.0695
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,2048,1,0,0.0662
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,1024,1,0,0.1101
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,1024,1,0,0.0711
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16384,1,0,0.1009
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4096,1,0,0.0766
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,2048,1,0,0.0737
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,2048,1,0,0.1151
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1,1,0,0.0638
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,4096,1,0,0.0755
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4,1,0,0.0639
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8192,1,0,0.0931
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,4096,1,0,0.1225
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,8192,1,0,0.0823
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8,1,0,0.0641
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16,1,0,0.0637
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,8192,1,0,0.1348
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,4,16384,1,0,0.0916
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,32,1,0,0.0641
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1,1,0,0.0632
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,64,1,0,0.0653
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4,1,0,0.0635
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,8,16384,1,0,0.1615
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,128,1,0,0.0673
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,128,1,0,0.1075
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8,1,0,0.0631
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,256,1,0,0.0418
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,256,1,0,0.0708
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1,1,0,0.0966
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16,1,0,0.0632
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,256,1,0,0.1099
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,512,1,0,0.0441
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,512,1,0,0.0729
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,8192,1,0,0.0757
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4,1,0,0.0969
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,512,1,0,0.1085
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1024,1,0,0.0495
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1024,1,0,0.0762
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8,1,0,0.0967
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1024,1,0,0.1151
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,2048,1,0,0.0570
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16,1,0,0.0969
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,8,16384,1,0,0.0934
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,32,1,0,0.0974
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,2048,1,0,0.1217
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4096,1,0,0.0655
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1,1,0,0.0459
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,64,1,0,0.0984
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4,1,0,0.0458
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4096,1,0,0.1385
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,128,1,0,0.1005
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8,1,0,0.0460
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16,1,0,0.0468
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,256,1,0,0.1086
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8192,1,0,0.1669
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,32,1,0,0.0516
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,512,1,0,0.1117
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,64,1,0,0.0505
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16384,1,0,0.1286
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,1024,1,0,0.1162
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1,1,0,0.0477
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,2048,1,0,0.1227
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16384,1,0,0.2238
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4,1,0,0.0481
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,32,1,0,0.0733
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,4096,1,0,0.1366
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,8,1,0,0.0485
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,64,1,0,0.0701
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,16,1,0,0.0481
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,128,1,0,0.0693
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,2048,1,0,0.0796
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,8192,1,0,0.1632
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,256,1,0,0.0695
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4096,1,0,0.0862
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,512,1,0,0.0718
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,1024,1,0,0.0743
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16,1,0,0.0352
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,2048,1,0,0.0751
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,32,1,0,0.0377
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,16,16384,1,0,0.2164
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,4096,1,0,0.0840
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,64,1,0,0.0372
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1,1,0,0.0957
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,128,1,0,0.0372
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8192,1,0,0.0929
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,128,1,0,0.0505
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4,1,0,0.0953
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,256,1,0,0.0527
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,8,1,0,0.0958
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16384,1,0,0.1095
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,512,1,0,0.0550
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,16,1,0,0.0963
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,1024,1,0,0.0572
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1,1,0,0.0929
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1,1,0,0.0613
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,32,1,0,0.0962
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,2048,1,0,0.0645
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4,1,0,0.0615
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4,1,0,0.0906
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,64,1,0,0.0973
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,32,1,0,0.0486
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8,1,0,0.0616
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,4096,1,0,0.0745
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,8,1,0,0.0927
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16,1,0,0.0618
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,64,1,0,0.0503
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,8192,1,0,0.0952
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8192,1,0,0.0840
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,16,1,0,0.0932
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,32,1,0,0.0717
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,8192,1,0,0.0922
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,64,1,0,0.0698
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,8,16384,1,0,0.1132
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,128,1,0,0.0696
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,256,1,0,0.0713
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1,1,0,0.0630
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,256,1,0,0.0387
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16384,1,0,0.1182
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,512,1,0,0.0731
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,512,1,0,0.0407
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4,1,0,0.0626
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1024,1,0,0.0746
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,1024,1,0,0.0444
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8,1,0,0.0627
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,2048,1,0,0.0826
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16,1,0,0.0629
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,2048,1,0,0.0566
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,32,1,0,0.0633
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4096,1,0,0.0922
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,4096,1,0,0.0654
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,64,1,0,0.0643
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,128,1,0,0.0985
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,128,1,0,0.0665
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,256,1,0,0.1142
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,128,1,0,0.0519
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,8192,1,0,0.0826
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,256,1,0,0.0732
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,32,1,0,0.0932
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,512,1,0,0.1174
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,256,1,0,0.0585
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,512,1,0,0.0750
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,64,1,0,0.0952
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1024,1,0,0.1246
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,512,1,0,0.0613
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,1024,1,0,0.0790
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,16,16384,1,0,0.1269
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,16,16384,1,0,0.1183
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1024,1,0,0.0660
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,2048,1,0,0.1376
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1,1,0,0.0346
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1,1,0,0.0457
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1,1,0,0.0375
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4,1,0,0.0350
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4,1,0,0.0466
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4,1,0,0.0375
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,8,1,0,0.0346
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4096,1,0,0.1645
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,8,1,0,0.0460
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,16,1,0,0.0353
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,16,1,0,0.0466
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,32,1,0,0.0377
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1,1,0,0.0962
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,32,1,0,0.0524
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,64,1,0,0.0384
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4,1,0,0.0953
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,64,1,0,0.0526
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,128,1,0,0.0394
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8192,1,0,0.1097
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,8,1,0,0.0956
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,128,1,0,0.0538
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,256,1,0,0.0423
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,16,1,0,0.0957
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,256,1,0,0.0563
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,512,1,0,0.0454
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,32,1,0,0.0955
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,512,1,0,0.0576
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1024,1,0,0.0595
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16384,1,0,0.1448
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,64,1,0,0.0972
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,1024,1,0,0.0697
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,2048,1,0,0.0677
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,2048,1,0,0.0833
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,128,1,0,0.0991
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1,1,0,0.0611
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,2048,1,0,0.0776
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4,1,0,0.0612
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4096,1,0,0.0854
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,256,1,0,0.1071
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,8,1,0,0.0612
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,8,1,0,0.0378
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1,1,0,0.0333
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,512,1,0,0.1131
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,16,1,0,0.0613
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,16,1,0,0.0377
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4,1,0,0.0338
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,32,1,0,0.0382
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,32,1,0,0.0726
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1024,1,0,0.1268
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,8,1,0,0.0337
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,64,1,0,0.0391
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,64,1,0,0.0732
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,128,1,0,0.0412
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,128,1,0,0.0740
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,2048,1,0,0.1533
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,256,1,0,0.0460
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,256,1,0,0.0763
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,128,1,0,0.0965
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,512,1,0,0.0771
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,256,1,0,0.1108
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1024,1,0,0.0887
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,512,1,0,0.1202
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4096,1,0,0.2067
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,4096,1,0,0.0933
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,2048,1,0,0.0977
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,1024,1,0,0.1280
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4096,1,0,0.0948
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1,1,0,0.1164
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,2048,1,0,0.1432
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1,1,0,0.0475
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4096,1,0,0.1138
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,4,1,0,0.1162
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4,1,0,0.0471
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1,1,0,0.0631
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,32,4096,1,0,0.1721
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,8,1,0,0.1167
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,16,1,0,0.0339
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4,1,0,0.0632
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,16,1,0,0.1164
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1,1,0,0.0920
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,8,1,0,0.0631
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,512,1,0,0.0522
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,32,1,0,0.1163
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,16,1,0,0.0637
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,1024,1,0,0.0584
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,2048,1,0,0.0754
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,32,1,0,0.0635
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,2048,1,0,0.0665
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,64,1,0,0.0655
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,32,4096,1,0,0.0933
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,128,1,0,0.0668
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,8192,1,0,0.1107
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,32,4096,1,0,0.0846
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,256,1,0,0.0817
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,8,1,0,0.0471
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,512,1,0,0.0928
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,16,1,0,0.0477
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,16,16384,1,0,0.1455
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,32,1,0,0.0336
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,32,1,0,0.0476
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1024,1,0,0.1076
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,64,1,0,0.0358
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1,1,0,0.0631
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4,1,0,0.0920
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4,1,0,0.0640
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,2048,1,0,0.1250
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,8,1,0,0.0916
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,64,1,0,0.1199
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,8,1,0,0.0631
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,16,1,0,0.0922
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,128,1,0,0.1229
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,16,1,0,0.0639
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,32,1,0,0.0918
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,32,1,0,0.0641
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4096,1,0,0.1594
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,256,1,0,0.1390
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,64,1,0,0.0935
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,64,1,0,0.0654
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1,1,0,0.0501
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1,1,0,0.0720
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,128,1,0,0.0950
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,512,1,0,0.1534
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,128,1,0,0.0658
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1,1,0,0.0366
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,4,1,0,0.0719
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,256,1,0,0.1244
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,256,1,0,0.0776
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,8,1,0,0.0718
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,512,1,0,0.1394
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1024,1,0,0.1808
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,64,1,0,0.0496
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,1024,1,0,0.1581
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,128,1,0,0.0514
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,128,1,0,0.0379
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,256,1,0,0.0603
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,256,1,0,0.0488
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,2048,1,0,0.1876
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,2048,1,0,0.2341
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,512,1,0,0.0716
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,512,1,0,0.0607
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1024,1,0,0.0847
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1,1,0,0.1730
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1024,1,0,0.0732
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,64,4096,1,0,0.2440
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,4,1,0,0.1743
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,2048,1,0,0.1012
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4,1,0,0.0497
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,2048,1,0,0.0903
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1,1,0,0.1129
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,8,1,0,0.1742
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4,1,0,0.0365
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,4,1,0,0.1125
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,8,1,0,0.0369
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,512,1,0,0.0808
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4096,1,0,0.1378
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,8,1,0,0.1131
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,16,1,0,0.0724
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,16,1,0,0.0366
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4096,1,0,0.1259
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,1024,1,0,0.0863
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1,1,0,0.0535
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,32,1,0,0.0371
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,32,1,0,0.0723
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,4,1,0,0.0534
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1,1,0,0.0382
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,2048,1,0,0.0952
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,64,1,0,0.0737
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,8,1,0,0.0533
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,4,1,0,0.0382
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,128,1,0,0.0749
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,16,1,0,0.0536
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,8,1,0,0.0381
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,32,4096,1,0,0.1131
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,256,1,0,0.1076
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,32,1,0,0.0538
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,16,1,0,0.0391
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1,1,0,0.0661
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,64,1,0,0.0552
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,32,1,0,0.0400
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4,1,0,0.0668
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,512,1,0,0.1272
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,64,1,0,0.0430
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,128,1,0,0.0569
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,8,1,0,0.0664
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,8,1,0,0.0505
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,16,1,0,0.0669
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,256,1,0,0.0794
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1024,1,0,0.1452
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,16,1,0,0.0503
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,512,1,0,0.0977
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,32,1,0,0.0507
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,16,1,0,0.1130
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,64,1,0,0.0526
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1024,1,0,0.1153
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,2048,1,0,0.1803
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,128,1,0,0.0539
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,32,1,0,0.1136
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,256,1,0,0.0641
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,64,1,0,0.1169
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1,1,0,0.0954
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,512,1,0,0.0688
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,128,1,0,0.1204
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,2048,1,0,0.1509
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,4,1,0,0.0965
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,256,1,0,0.1285
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,8,1,0,0.0964
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1,1,0,0.0689
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,512,1,0,0.1448
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,16,1,0,0.0970
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,4,1,0,0.0690
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,128,1,0,0.0465
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,32,1,0,0.0984
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,8,1,0,0.0692
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,16,1,0,0.1742
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,256,1,0,0.0656
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,1024,1,0,0.1744
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,16,1,0,0.0698
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,32,1,0,0.1753
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,512,1,0,0.0833
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,32,1,0,0.0703
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,64,1,0,0.0391
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,64,1,0,0.1812
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,128,2048,1,0,0.2323
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1024,1,0,0.1007
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,128,1,0,0.1878
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1,1,0,0.1661
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,1024,1,0,0.0784
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,4,1,0,0.1672
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,2048,1,0,0.1364
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,256,1,0,0.2167
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1,1,0,0.0520
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,8,1,0,0.1672
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,32,1,0,0.0672
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,512,1,0,0.2452
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,16,1,0,0.1672
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,4,1,0,0.0524
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,64,1,0,0.1020
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,8,1,0,0.0531
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,32,1,0,0.1694
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,128,1,0,0.1063
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,16,1,0,0.0538
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,64,1,0,0.1753
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,128,1,0,0.0416
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1024,1,0,0.2985
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,32,1,0,0.0562
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,256,1,0,0.1185
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,256,1,0,0.0538
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,128,1,0,0.1821
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,64,1,0,0.0593
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,512,1,0,0.0583
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,512,1,0,0.1380
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,1,1,0,0.3042
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,128,1,0,0.0643
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,256,1,0,0.1986
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,1024,1,0,0.0681
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,4,1,0,0.3041
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,256,1,0,0.0751
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,2048,1,0,0.0971
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,2048,1,0,0.0863
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,512,1,0,0.2298
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1024,1,0,0.1734
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,64,1,0,0.0680
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,8,1,0,0.3071
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,512,1,0,0.0937
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,128,1,0,0.0693
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,1,1,0,0.1631
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,64,4096,1,0,0.1211
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,256,1,0,0.0864
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,256,1024,1,0,0.2886
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,64,1,0,0.0730
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1,1,0,0.0429
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,512,1,0,0.0905
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,4,1,0,0.1629
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,4,1,0,0.0428
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,128,1,0,0.0784
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,1,1,0,0.2893
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,1024,1,0,0.1000
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,8,1,0,0.0428
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,8,1,0,0.1637
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,256,1,0,0.0900
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,4,1,0,0.2917
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,2048,1,0,0.1182
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,16,1,0,0.1657
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,512,1,0,0.1094
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,8,1,0,0.2912
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,32,1,0,0.1676
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,64,4096,1,0,0.1545
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,16,1,0,0.2909
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,64,1,0,0.1744
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,64,4096,1,0,0.1321
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1,1,0,0.0760
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1024,1,0,0.1440
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,16,1,0,0.3075
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,4,1,0,0.0764
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,32,1,0,0.2928
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1,1,0,0.0574
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1024,1,0,0.1286
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,128,1,0,0.1825
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,8,1,0,0.0766
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,1,1,0,0.1031
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,4,1,0,0.0568
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,32,1,0,0.3086
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,16,1,0,0.0768
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,64,1,0,0.3046
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,8,1,0,0.0573
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,4,1,0,0.1039
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,1,1,0,0.0799
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,32,1,0,0.0763
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,16,1,0,0.0568
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,256,1,0,0.2021
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,64,1,0,0.3182
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,64,1,0,0.0786
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,32,1,0,0.0573
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,4,1,0,0.0808
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,128,1,0,0.3191
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,128,1,0,0.0804
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,64,1,0,0.0597
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,8,1,0,0.0812
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,128,1,0,0.3317
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,256,1,0,0.0902
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,512,1,0,0.2424
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,16,1,0,0.0824
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,512,1,0,0.1002
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,256,1,0,0.3858
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,1024,1,0,0.1187
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,1,1,0,0.2929
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,16,1,0,0.0436
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,4,1,0,0.2938
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,32,1,0,0.0445
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,8,1,0,0.1042
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,512,1,0,0.4414
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,64,1,0,0.0475
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,16,1,0,0.1065
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,128,1,0,0.0619
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,128,1,0,0.0515
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,256,1,0,0.3498
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,1,1,0,0.5552
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,256,1,0,0.0613
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,256,1,0,0.0708
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,32,1,0,0.1088
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,32,1,0,0.0839
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,512,1,0,0.0703
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,512,1,0,0.0795
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,64,1,0,0.1165
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,64,1,0,0.0911
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,1024,1,0,0.0885
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,512,512,1,0,0.4095
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,128,2048,1,0,0.1534
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,128,1,0,0.1258
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,128,1,0,0.1003
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,128,2048,1,0,0.1248
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1,1,0,0.1009
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,8,1,0,0.2942
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,1,1,0,0.5369
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,256,1,0,0.1458
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1,1,0,0.0583
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,4,1,0,0.1016
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,4,1,0,0.0583
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,8,1,0,0.1014
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,8,1,0,0.0590
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,16,1,0,0.2943
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,16,1,0,0.1010
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,4,1,0,0.5414
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,16,1,0,0.0594
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,512,1,0,0.1839
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,32,1,0,0.1026
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,4,1,0,0.5587
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,32,1,0,0.0614
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,1024,1,0,0.0982
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,64,1,0,0.1056
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,32,1,0,0.2966
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,64,1,0,0.0660
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,8,1,0,0.5419
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,1,1,0,0.1849
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,128,1,0,0.1118
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,128,1,0,0.0714
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,8,1,0,0.5667
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,128,2048,1,0,0.1334
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,64,1,0,0.3084
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,256,1,0,0.1314
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,4,1,0,0.1866
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,256,1,0,0.0857
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,16,1,0,0.5413
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,256,1,0,0.1193
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1,1,0,0.0739
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,16,1,0,0.5731
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,512,1,0,0.1510
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,4,1,0,0.0745
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,512,1,0,0.1051
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,8,1,0,0.1878
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,8,1,0,0.0744
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,128,1,0,0.3268
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,32,1,0,0.5433
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,512,1,0,0.1570
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,16,1,0,0.0749
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,256,1024,1,0,0.1877
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,32,1,0,0.5716
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,16,1,0,0.1902
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,256,1024,1,0,0.1406
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,32,1,0,0.0751
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,1,1,0,0.1724
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,1,1,0,0.1253
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,64,1,0,0.0787
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,64,1,0,0.5657
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,1,1,0,0.0913
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,32,1,0,0.1944
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,64,1,0,0.5874
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,128,1,0,0.0847
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,256,1,0,0.3670
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,4,1,0,0.0915
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,4,1,0,0.1274
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,256,1,0,0.1014
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,8,1,0,0.0916
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,64,1,0,0.2040
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,128,1,0,0.5937
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,8,1,0,0.1283
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,16,1,0,0.0922
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,512,1,0,0.1205
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,128,1,0,0.6135
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,32,1,0,0.0929
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,128,1,0,0.2205
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,16,1,0,0.1317
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,64,1,0,0.1013
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,256,1024,1,0,0.1570
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,256,1,0,0.6581
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,32,1,0,0.1373
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,128,1,0,0.1111
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,4,1,0,0.1726
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,128,1024,256,1,0,0.7220
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,8,1,0,0.1734
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,256,1,0,0.2591
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,256,1,0,0.1382
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,64,1,0,0.1479
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,16,1,0,0.1742
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,32,1,0,0.1757
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,128,1,0,0.1649
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,64,1,0,0.1824
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,1,1,0,0.1134
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,128,1,0,0.1916
SGLang,0.5.10,NVIDIA H200,mla_generation,flashinfer,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,256,1,0,0.2031
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,4,1,0,0.1135
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,256,1,0,0.2284
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,8,1,0,0.1144
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,16,1,0,0.1152
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,512,512,1,0,0.1751
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,32,1,0,0.1175
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,512,512,1,0,0.2657
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,64,1,0,0.1257
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,1,1,0,0.1419
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,1,1,0,0.3071
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,128,1,0,0.1362
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,4,1,0,0.1419
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,4,1,0,0.3097
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,256,1,0,0.1659
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,8,1,0,0.1444
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,8,1,0,0.3101
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,16,1,0,0.1456
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,512,512,1,0,0.2037
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,16,1,0,0.3121
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,32,1,0,0.1482
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,1,1,0,0.1974
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,32,1,0,0.3122
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,64,1,0,0.1633
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,4,1,0,0.1984
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,64,1,0,0.3236
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,128,1,0,0.1806
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,128,1,0,0.3412
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,16,1024,256,1,0,0.2292
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,64,1024,256,1,0,0.4079
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,8,1,0,0.1989
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,16,1,0,0.2006
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,32,1,0,0.2030
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,64,1,0,0.2167
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,128,1,0,0.2340
SGLang,0.5.10,NVIDIA H200,mla_generation,fa3,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8_block,fp8,fp8_block,32,1024,256,1,0,0.2896
