framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,32,1,0,0.1592
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1536,1,0,0.7351
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,16,1,0,0.1539
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,64,1,0,0.1720
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,128,1,0,0.1828
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,512,1,0,0.3090
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1024,1,0,0.5102
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,256,1,0,0.2150
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,2048,1,0,0.9765
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,3072,1,0,1.5033
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,4096,1,0,2.0899
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,6144,1,0,3.2731
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,8192,1,0,4.7110
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,10240,1,0,6.2146
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,12288,1,0,8.1572
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,16,1,0,0.1633
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,16384,1,0,12.1993
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,32,1,0,0.1748
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,64,1,0,0.1877
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,128,1,0,0.2150
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,256,1,0,0.3006
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,512,1,0,0.4920
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1024,1,0,0.9189
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1536,1,0,1.3785
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,2048,1,0,1.8799
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,3072,1,0,2.8096
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,4096,1,0,3.9296
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,6144,1,0,6.5587
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,8192,1,0,9.5231
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,32768,1,0,34.6903
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,16,1,0,0.1786
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,10240,1,0,12.6537
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,32,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,64,1,0,0.2200
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,12288,1,0,15.9314
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,128,1,0,0.3026
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,256,1,0,0.4844
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,512,1,0,0.8900
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1024,1,0,1.7725
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,16384,1,0,23.7056
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1536,1,0,2.5772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,2048,1,0,3.5325
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,3072,1,0,5.6890
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,4096,1,0,8.0556
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,6144,1,0,12.7901
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,16,1,0,0.2013
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,8192,1,0,18.4358
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,32,1,0,0.2285
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,64,1,0,0.3101
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,10240,1,0,25.1225
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,128,1,0,0.4849
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,256,1,0,0.8751
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,512,1,0,1.7234
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,12288,1,0,32.1593
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1024,1,0,3.3479
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,32768,1,0,68.8797
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1536,1,0,5.2245
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,2048,1,0,7.2333
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,3072,1,0,11.0677
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,16384,1,0,46.9965
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,4096,1,0,15.4986
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,16,1,0,0.2481
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,32,1,0,0.3285
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,6144,1,0,25.9471
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,64,1,0,0.5010
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,128,1,0,0.8786
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,256,1,0,1.6905
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,512,1,0,3.2104
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,8192,1,0,36.5340
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1024,1,0,6.8390
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,10240,1,0,49.1100
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1536,1,0,10.1589
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,2048,1,0,13.8809
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,12288,1,0,60.5884
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,3072,1,0,22.5267
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,16,1,0,0.3650
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,32,1,0,0.5395
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,64,1,0,0.9125
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,4096,1,0,30.7421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,128,1,0,1.6925
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,256,1,0,3.1653
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,512,1,0,6.6135
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,32768,1,0,134.4242
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1024,1,0,13.0808
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,16384,1,0,90.8979
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,6144,1,0,48.1653
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1536,1,0,20.7864
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,16,1,0,0.6167
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,32,1,0,0.9867
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,64,1,0,1.7637
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,2048,1,0,27.5493
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,128,1,0,3.1962
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,256,1,0,6.5683
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,8192,1,0,69.9241
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,512,1,0,12.6643
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,3072,1,0,41.3755
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,16,1,0,1.1419
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,32,1,0,1.9151
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,64,1,0,3.3292
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1024,1,0,25.9217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,128,1,0,6.5500
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,4096,1,0,58.2935
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,256,1,0,12.5432
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,16,1,0,2.2184
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1536,1,0,37.8778
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,32,1,0,3.6259
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,64,1,0,6.8631
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,512,1,0,25.0932
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,2048,1,0,52.0095
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,16,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,64,1,0,0.1337
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,32,1,0,0.1299
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,128,1,0,12.5537
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,128,1,0,0.1419
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,512,1,0,0.2160
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,256,1,0,0.1669
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1024,1,0,0.3417
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1536,1,0,0.4623
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,2048,1,0,0.6121
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,3072,1,0,0.8655
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,4096,1,0,1.1801
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,256,1,0,24.8225
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,6144,1,0,1.9203
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,8192,1,0,2.7335
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1024,1,0,48.7644
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,10240,1,0,3.5869
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,12288,1,0,4.4950
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,16,1,0,0.1303
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,16384,1,0,6.6000
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,32,1,0,0.1327
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,128,1,0,0.1600
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,64,1,0,0.1450
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,256,1,0,0.2146
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,512,1,0,0.3304
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1024,1,0,0.5769
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1536,1,0,0.7952
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,2048,1,0,1.0595
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,3072,1,0,1.6776
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,4096,1,0,2.3442
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,32768,1,0,18.7221
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,512,1,0,47.1182
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,6144,1,0,3.6943
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,8192,1,0,5.2266
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,10240,1,0,6.8929
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,16,1,0,0.1359
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,32,1,0,0.1446
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,12288,1,0,8.7557
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,64,1,0,0.1671
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,128,1,0,0.2147
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,256,1,0,0.3223
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,512,1,0,0.5578
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,16384,1,0,13.1115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1024,1,0,1.0031
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1536,1,0,1.5517
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,2048,1,0,2.1286
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,3072,1,0,3.2214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,4096,1,0,4.4734
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,6144,1,0,7.1690
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,8192,1,0,10.4371
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,16,1,0,0.1501
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,32,1,0,0.1688
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,10240,1,0,13.8607
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,64,1,0,0.2178
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,32768,1,0,36.9450
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,128,1,0,0.3244
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,256,1,0,0.5503
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,12288,1,0,17.3205
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,512,1,0,0.9731
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1024,1,0,2.0098
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1536,1,0,2.9865
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,16384,1,0,25.9276
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,2048,1,0,4.0611
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,3072,1,0,6.3036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,4096,1,0,8.9491
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,16,1,0,0.1767
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,6144,1,0,14.2022
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,32,1,0,0.2286
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,64,1,0,0.3309
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,128,1,0,0.5506
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,8192,1,0,20.6581
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,256,1,0,0.9580
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,512,1,0,1.9527
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1024,1,0,3.8494
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,10240,1,0,27.5682
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1536,1,0,5.8489
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,2048,1,0,8.1519
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,12288,1,0,35.0515
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,3072,1,0,12.4713
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,32768,1,0,73.5651
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,32,1,0,0.3502
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,16,1,0,0.2483
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,4096,1,0,17.7070
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,64,1,0,0.5684
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,128,1,0,0.9570
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,16384,1,0,51.7347
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,256,1,0,1.9354
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,512,1,0,3.7450
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1024,1,0,7.7440
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,6144,1,0,28.8014
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,16,1,0,0.3887
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1536,1,0,11.5768
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,32,1,0,0.6052
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,64,1,0,1.0002
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,2048,1,0,16.1178
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,128,1,0,1.9393
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,8192,1,0,41.2484
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,256,1,0,3.7053
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,512,1,0,7.5259
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,3072,1,0,25.3886
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,16,1,0,0.6812
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,32,1,0,1.0751
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1024,1,0,15.3049
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,64,1,0,2.0074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,128,1,0,3.7081
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,4096,1,0,35.4498
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,256,1,0,7.4527
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,16,1,0,1.2240
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1536,1,0,23.6527
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,32,1,0,2.1614
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,64,1,0,3.8541
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,512,1,0,14.8832
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,2048,1,0,32.2569
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,128,1,0,7.4647
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,16,1,0,0.1078
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,32,1,0,0.1113
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,64,1,0,0.1186
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,128,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,256,1,0,0.1357
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,512,1,0,0.1664
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1024,1,0,0.2309
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,256,1,0,14.7578
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1536,1,0,0.3023
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,2048,1,0,0.3838
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,3072,1,0,0.5319
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,4096,1,0,0.7059
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1024,1,0,30.6078
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,6144,1,0,1.1286
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,8192,1,0,1.5642
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,10240,1,0,2.0474
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,12288,1,0,2.5663
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,16,1,0,0.1118
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,32,1,0,0.1152
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,16384,1,0,3.7281
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,64,1,0,0.1223
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,128,1,0,0.1301
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,256,1,0,0.1635
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,512,1,0,29.7858
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,512,1,0,0.2230
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1024,1,0,0.3622
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1536,1,0,0.4888
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,2048,1,0,0.6459
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,32768,1,0,10.0463
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,3072,1,0,0.9929
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,4096,1,0,1.3466
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,6144,1,0,2.1248
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,8192,1,0,3.0113
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,10240,1,0,3.9097
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,16,1,0,0.1189
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,12288,1,0,4.9306
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,32,1,0,0.1197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,64,1,0,0.1329
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,16384,1,0,7.1809
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,128,1,0,0.1605
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,256,1,0,0.2214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,512,1,0,0.3506
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1024,1,0,0.6020
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1536,1,0,0.9225
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,2048,1,0,1.2313
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,3072,1,0,1.8899
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,4096,1,0,2.6000
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,6144,1,0,4.1059
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,32768,1,0,19.9099
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,8192,1,0,5.8083
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,10240,1,0,7.6431
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,16,1,0,0.1234
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,32,1,0,0.1364
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,12288,1,0,9.7226
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,64,1,0,0.1659
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,128,1,0,0.2218
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,256,1,0,0.3432
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,512,1,0,0.5913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,16384,1,0,14.3132
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1024,1,0,1.1668
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1536,1,0,1.7653
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,2048,1,0,2.3869
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,3072,1,0,3.6661
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,4096,1,0,5.0423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,6144,1,0,8.1230
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,16,1,0,0.1426
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,32,1,0,0.1684
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,8192,1,0,11.6327
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,64,1,0,0.2280
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,128,1,0,0.3463
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,10240,1,0,15.4324
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,256,1,0,0.5777
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,512,1,0,1.1440
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,12288,1,0,19.1660
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1024,1,0,2.2804
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,32768,1,0,39.4920
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1536,1,0,3.4269
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,2048,1,0,4.6370
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,16,1,0,0.1774
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,3072,1,0,7.2516
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,32,1,0,0.2348
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,16384,1,0,28.4262
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,64,1,0,0.3534
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,4096,1,0,10.1370
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,128,1,0,0.5766
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,256,1,0,1.1269
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,512,1,0,2.2180
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,6144,1,0,16.0404
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1024,1,0,4.4224
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1536,1,0,6.7990
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,16,1,0,0.2545
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,2048,1,0,9.3626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,32,1,0,0.3723
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,8192,1,0,23.1945
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,64,1,0,0.6033
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,128,1,0,1.1245
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,3072,1,0,14.3111
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,256,1,0,2.1981
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,512,1,0,4.3140
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,4096,1,0,20.2487
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,16,1,0,0.4094
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,32,1,0,0.6324
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1024,1,0,8.9382
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,64,1,0,1.1613
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,128,1,0,2.2136
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1536,1,0,13.4296
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,256,1,0,4.2698
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,16,1,0,0.7164
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,2048,1,0,18.6761
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,32,1,0,1.2381
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,512,1,0,8.7193
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,64,1,0,2.2737
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,128,1,0,4.2863
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,16,1,0,0.0908
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,32,1,0,0.0992
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,64,1,0,0.1032
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1024,1,0,17.8496
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,128,1,0,0.1071
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,256,1,0,0.1236
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,512,1,0,0.1420
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1024,1,0,0.1853
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,256,1,0,8.6501
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1536,1,0,0.2238
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,2048,1,0,0.2634
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,3072,1,0,0.3680
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,4096,1,0,0.4882
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,6144,1,0,0.7312
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,8192,1,0,0.9853
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,10240,1,0,1.2731
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,12288,1,0,1.5734
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,512,1,0,17.4141
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,16384,1,0,2.2652
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,16,1,0,0.0971
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,32,1,0,0.1035
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,64,1,0,0.1081
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,128,1,0,0.1166
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,32768,1,0,5.8288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,256,1,0,0.1378
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,512,1,0,0.1775
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1024,1,0,0.2513
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1536,1,0,0.3434
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,2048,1,0,0.4401
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,3072,1,0,0.6483
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,4096,1,0,0.8595
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,6144,1,0,1.3319
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,8192,1,0,1.8588
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,10240,1,0,2.4047
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,16,1,0,0.1027
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,12288,1,0,3.0074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,32,1,0,0.1074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,16384,1,0,4.3197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,64,1,0,0.1202
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,128,1,0,0.1358
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,256,1,0,0.1748
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,512,1,0,0.2436
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1024,1,0,0.4209
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1536,1,0,0.6083
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,32768,1,0,11.2579
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,2048,1,0,0.7975
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,3072,1,0,1.1981
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,4096,1,0,1.6441
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,6144,1,0,2.5772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,8192,1,0,3.5889
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,16,1,0,0.1074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,10240,1,0,4.6796
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,32,1,0,0.1173
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,12288,1,0,5.9027
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,128,1,0,0.1723
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,64,1,0,0.1366
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,256,1,0,0.2404
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,512,1,0,0.4043
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,16384,1,0,8.3842
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1024,1,0,0.7639
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1536,1,0,1.1329
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,2048,1,0,1.5281
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,3072,1,0,2.3390
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,4096,1,0,3.1859
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,6144,1,0,5.0851
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,16,1,0,0.1232
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,8192,1,0,7.0202
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,32,1,0,0.1382
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,32768,1,0,22.4162
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,64,1,0,0.1753
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,10240,1,0,9.2347
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,128,1,0,0.2431
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,256,1,0,0.3967
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,512,1,0,0.7445
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,12288,1,0,11.5682
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1024,1,0,1.4709
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1536,1,0,2.2129
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,16384,1,0,16.8688
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,2048,1,0,2.9746
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,3072,1,0,4.6315
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,16,1,0,0.1437
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,4096,1,0,6.2697
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,32,1,0,0.1810
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,64,1,0,0.2465
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,128,1,0,0.3981
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,256,1,0,0.7292
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,6144,1,0,9.9870
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,512,1,0,1.4437
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1024,1,0,2.8646
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,8192,1,0,14.1622
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,16,1,0,0.1903
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1536,1,0,4.4000
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,2048,1,0,5.8618
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,32,1,0,0.2545
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,64,1,0,0.4098
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,128,1,0,0.7318
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,3072,1,0,9.1435
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,512,1,0,2.8112
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,256,1,0,1.4270
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,4096,1,0,12.6805
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1024,1,0,5.6407
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,16,1,0,0.2750
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,32,1,0,0.4256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,64,1,0,0.7494
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1536,1,0,8.6619
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,128,1,0,1.4304
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,256,1,0,2.7928
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,2048,1,0,11.8981
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,16,1,0,0.4621
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,512,1,0,5.5409
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,32,1,0,0.7872
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,64,1,0,1.4682
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,16,1,0,0.0888
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,32,1,0,0.0964
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,128,1,0,2.7925
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1024,1,0,11.4694
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,64,1,0,0.1010
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,128,1,0,0.1031
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,256,1,0,0.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,512,1,0,0.1271
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,256,1,0,5.5023
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1024,1,0,0.1626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1536,1,0,0.1890
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,2048,1,0,0.2231
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,3072,1,0,0.2897
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,4096,1,0,0.3565
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,6144,1,0,0.5291
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,8192,1,0,0.7167
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,512,1,0,11.2701
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,12288,1,0,1.1042
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,10240,1,0,0.9148
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,16384,1,0,1.5354
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,32768,1,0,3.6906
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,16,1,0,0.0930
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,32,1,0,0.0989
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,64,1,0,0.1055
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,128,1,0,0.1099
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,256,1,0,0.1236
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,512,1,0,0.1558
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1024,1,0,0.2091
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1536,1,0,0.2713
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,2048,1,0,0.3305
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,3072,1,0,0.4902
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,4096,1,0,0.6335
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,6144,1,0,0.9512
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,8192,1,0,1.2996
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,10240,1,0,1.6509
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,12288,1,0,2.0341
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,16384,1,0,2.8695
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,16,1,0,0.0992
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,32,1,0,0.1053
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,64,1,0,0.1149
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,32768,1,0,7.0498
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,128,1,0,0.1221
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,256,1,0,0.1525
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,512,1,0,0.2037
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1024,1,0,0.3184
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1536,1,0,0.4623
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,2048,1,0,0.5987
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,3072,1,0,0.8772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,4096,1,0,1.1729
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,6144,1,0,1.7921
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,8192,1,0,2.4653
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,10240,1,0,3.1855
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,16,1,0,0.1032
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,12288,1,0,3.9918
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,32,1,0,0.1105
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,16384,1,0,5.5528
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,64,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,128,1,0,0.1491
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,256,1,0,0.1995
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,512,1,0,0.3124
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1024,1,0,0.5669
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1536,1,0,0.8343
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,2048,1,0,1.1036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,3072,1,0,1.6617
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,4096,1,0,2.2493
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,32768,1,0,13.8374
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,6144,1,0,3.5659
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,8192,1,0,4.8177
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,16,1,0,0.1092
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,32,1,0,0.1236
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,64,1,0,0.1490
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,10240,1,0,6.2843
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,128,1,0,0.1980
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,256,1,0,0.3118
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,12288,1,0,7.7850
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,512,1,0,0.5560
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1024,1,0,1.0711
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,16384,1,0,10.9591
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1536,1,0,1.5957
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,2048,1,0,2.1328
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,3072,1,0,3.3290
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,4096,1,0,4.4269
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,32,1,0,0.1522
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,16,1,0,0.1235
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,64,1,0,0.2013
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,128,1,0,0.3164
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,256,1,0,0.5489
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,6144,1,0,6.9470
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,512,1,0,1.0531
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1024,1,0,2.0777
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,8192,1,0,9.5744
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1536,1,0,3.2034
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,16,1,0,0.1572
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,2048,1,0,4.2190
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,32,1,0,0.2054
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,64,1,0,0.3163
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,128,1,0,0.5491
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,3072,1,0,6.5117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,256,1,0,1.0464
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,512,1,0,2.0441
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,4096,1,0,8.8354
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,16,1,0,0.2153
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1024,1,0,4.1102
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,32,1,0,0.3294
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,64,1,0,0.5572
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1536,1,0,6.2759
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,128,1,0,1.0462
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,256,1,0,2.0313
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,2048,1,0,8.4187
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,512,1,0,4.0603
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,16,1,0,0.3431
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,32,1,0,0.5783
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,64,1,0,1.0617
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,128,1,0,2.0362
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1024,1,0,8.2145
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,16,1,0,0.0885
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,32,1,0,0.0914
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,256,1,0,4.0351
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,64,1,0,0.0981
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,128,1,0,0.1031
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,256,1,0,0.1116
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,512,1,0,0.1239
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1024,1,0,0.1525
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1536,1,0,0.1740
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,2048,1,0,0.2035
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,512,1,0,8.1009
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,3072,1,0,0.2589
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,4096,1,0,0.3216
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,6144,1,0,0.4458
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,8192,1,0,0.5645
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,10240,1,0,0.7007
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,12288,1,0,0.8567
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,16384,1,0,1.1909
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,16,1,0,0.0930
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,32,1,0,0.0974
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,32768,1,0,2.6447
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,64,1,0,0.1011
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,128,1,0,0.1095
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,256,1,0,0.1214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,512,1,0,0.1450
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1024,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1536,1,0,0.2402
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,2048,1,0,0.2962
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,3072,1,0,0.4083
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,4096,1,0,0.5134
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,6144,1,0,0.7626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,8192,1,0,1.0399
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,10240,1,0,1.2996
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,12288,1,0,1.5756
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,32,1,0,0.1010
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,16,1,0,0.1000
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,16384,1,0,2.1602
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,64,1,0,0.1097
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,128,1,0,0.1172
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,32768,1,0,4.9707
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,256,1,0,0.1419
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,512,1,0,0.1831
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1024,1,0,0.2852
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1536,1,0,0.3929
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,2048,1,0,0.4920
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,3072,1,0,0.7151
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,4096,1,0,0.9602
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,6144,1,0,1.4260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,8192,1,0,1.9223
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,10240,1,0,2.4559
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,16,1,0,0.1010
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,12288,1,0,3.0614
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,16384,1,0,4.1406
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,32,1,0,0.1070
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,64,1,0,0.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,128,1,0,0.1393
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,256,1,0,0.1801
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,512,1,0,0.2780
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1024,1,0,0.4770
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1536,1,0,0.6934
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,32768,1,0,9.7084
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,2048,1,0,0.9173
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,3072,1,0,1.3474
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,4096,1,0,1.8000
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,6144,1,0,2.8125
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,8192,1,0,3.7396
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,10240,1,0,4.8506
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,32,1,0,0.1173
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,64,1,0,0.1380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,16,1,0,0.1073
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,12288,1,0,5.9236
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,128,1,0,0.1769
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,256,1,0,0.2769
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,512,1,0,0.4724
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1024,1,0,0.8966
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,16384,1,0,8.1803
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1536,1,0,1.3047
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,2048,1,0,1.7321
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,3072,1,0,2.6890
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,16,1,0,0.1174
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,4096,1,0,3.5277
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,32,1,0,0.1395
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,64,1,0,0.1772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,128,1,0,0.2764
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,6144,1,0,5.4943
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,256,1,0,0.4669
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,512,1,0,0.8844
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,8192,1,0,7.4682
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1024,1,0,1.6981
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1536,1,0,2.6171
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,2048,1,0,3.4115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,3072,1,0,5.2570
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,16,1,0,0.1437
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,32,1,0,0.1798
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,64,1,0,0.2747
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,128,1,0,0.4725
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,4096,1,0,7.0636
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,256,1,0,0.8816
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,512,1,0,1.6803
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1024,1,0,3.3522
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,16,1,0,0.1849
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,32,1,0,0.2812
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,64,1,0,0.4828
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,128,1,0,0.8779
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1536,1,0,5.1281
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,256,1,0,1.6735
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,2048,1,0,6.8473
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,512,1,0,3.3250
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,32,1,0,0.4888
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,64,1,0,0.8900
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,16,1,0,0.2895
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,128,1,0,1.6725
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1024,1,0,6.7381
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,16,1,0,0.0892
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,32,1,0,0.0931
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,256,1,0,3.3094
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,64,1,0,0.0952
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,128,1,0,0.1018
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,256,1,0,0.1098
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,512,1,0,0.1216
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,512,1,0,6.6849
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1024,1,0,0.1490
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1536,1,0,0.1695
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,2048,1,0,0.1951
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,3072,1,0,0.2514
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,4096,1,0,0.3045
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,8192,1,0,0.5324
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,6144,1,0,0.4181
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,10240,1,0,0.6420
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,12288,1,0,0.7609
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,16,1,0,0.0910
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,16384,1,0,0.9998
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,32768,1,0,2.1338
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,32,1,0,0.0993
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,64,1,0,0.1012
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,128,1,0,0.1074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,256,1,0,0.1173
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,512,1,0,0.1421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1024,1,0,0.1821
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,2048,1,0,0.2883
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,3072,1,0,0.3813
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,4096,1,0,0.4783
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,6144,1,0,0.6880
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,8192,1,0,0.8956
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,10240,1,0,1.1078
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,12288,1,0,1.3483
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1536,1,0,0.2327
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,16,1,0,0.0955
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,32,1,0,0.1013
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,16384,1,0,1.8339
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,64,1,0,0.1071
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,128,1,0,0.1159
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,32768,1,0,3.9460
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,256,1,0,0.1381
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,512,1,0,0.1756
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1024,1,0,0.2670
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1536,1,0,0.3639
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,2048,1,0,0.4550
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,3072,1,0,0.6468
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,4096,1,0,0.8489
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,6144,1,0,1.2551
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,8192,1,0,1.6821
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,10240,1,0,2.1196
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,16,1,0,0.1030
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,12288,1,0,2.6299
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,32,1,0,0.1052
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,16384,1,0,3.4689
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,64,1,0,0.1134
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,128,1,0,0.1370
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,256,1,0,0.1742
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,512,1,0,0.2626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1024,1,0,0.4408
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,32768,1,0,7.6708
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1536,1,0,0.6323
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,2048,1,0,0.8214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,3072,1,0,1.2057
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,4096,1,0,1.6034
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,6144,1,0,2.4754
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,8192,1,0,3.2343
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,32,1,0,0.1135
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,10240,1,0,4.1755
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,64,1,0,0.1368
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,12288,1,0,5.0478
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,16,1,0,0.1078
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,128,1,0,0.1706
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,16384,1,0,6.8550
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,256,1,0,0.2599
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,512,1,0,0.4389
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1024,1,0,0.8064
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1536,1,0,1.1818
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,2048,1,0,1.5616
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,16,1,0,0.1135
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,3072,1,0,2.3936
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,32,1,0,0.1363
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,64,1,0,0.1703
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,6144,1,0,4.8109
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,128,1,0,0.2556
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,4096,1,0,3.1126
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,256,1,0,0.4308
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,512,1,0,0.8036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,8192,1,0,6.4511
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1024,1,0,1.5383
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1536,1,0,2.3554
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,16,1,0,0.1359
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,2048,1,0,3.0444
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,32,1,0,0.1704
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,3072,1,0,4.6756
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,64,1,0,0.2538
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,128,1,0,0.4329
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,4096,1,0,6.2545
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,256,1,0,0.8003
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,512,1,0,1.5304
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1024,1,0,3.0114
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,16,1,0,0.1737
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,32,1,0,0.2576
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1536,1,0,4.6139
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,64,1,0,0.4369
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,128,1,0,0.7982
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,2048,1,0,6.1142
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,256,1,0,1.5224
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,512,1,0,2.9912
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,16,1,0,0.2640
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,32,1,0,0.4427
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,64,1,0,0.8135
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1024,1,0,6.0569
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,128,1,0,1.5223
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,16,1,0,0.0887
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,32,1,0,0.0902
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,256,1,0,2.9818
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,64,1,0,0.0931
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,128,1,0,0.1012
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,256,1,0,0.1115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,512,1,0,0.1197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1024,1,0,0.1443
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,512,1,0,6.0359
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,2048,1,0,0.1899
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1536,1,0,0.1644
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,3072,1,0,0.2468
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,4096,1,0,0.2996
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,6144,1,0,0.3995
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,8192,1,0,0.5112
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,10240,1,0,0.6223
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,12288,1,0,0.7394
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,16,1,0,0.0906
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,32,1,0,0.0914
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,32768,1,0,1.8510
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,64,1,0,0.0971
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,128,1,0,0.1053
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,16384,1,0,0.9673
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,256,1,0,0.1141
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,512,1,0,0.1379
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1024,1,0,0.1771
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1536,1,0,0.2260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,2048,1,0,0.2684
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,3072,1,0,0.3606
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,4096,1,0,0.4571
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,6144,1,0,0.6693
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,8192,1,0,0.8624
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,10240,1,0,1.0430
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,12288,1,0,1.2509
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,16384,1,0,1.6485
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,16,1,0,0.0952
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,32,1,0,0.0971
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,64,1,0,0.1033
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,32768,1,0,3.4473
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,128,1,0,0.1122
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,256,1,0,0.1339
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,512,1,0,0.1721
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1024,1,0,0.2559
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1536,1,0,0.3504
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,2048,1,0,0.4254
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,3072,1,0,0.6294
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,4096,1,0,0.8128
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,6144,1,0,1.1763
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,8192,1,0,1.5474
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,10240,1,0,1.9298
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,16,1,0,0.0970
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,12288,1,0,2.3976
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,32,1,0,0.1055
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,16384,1,0,3.1455
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,64,1,0,0.1084
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,128,1,0,0.1315
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,256,1,0,0.1667
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,512,1,0,0.2495
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1024,1,0,0.4209
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1536,1,0,0.6074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,32768,1,0,6.6780
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,2048,1,0,0.7892
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,3072,1,0,1.1358
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,4096,1,0,1.4906
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,6144,1,0,2.3039
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,8192,1,0,2.9977
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,16,1,0,0.1072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,10240,1,0,3.8461
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,32,1,0,0.1114
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,12288,1,0,4.6076
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,64,1,0,0.1319
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,128,1,0,0.1647
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,256,1,0,0.2528
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,512,1,0,0.4158
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,16384,1,0,6.1931
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1024,1,0,0.7722
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1536,1,0,1.1175
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,2048,1,0,1.4666
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,3072,1,0,2.2570
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,4096,1,0,2.9193
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,16,1,0,0.1104
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,32,1,0,0.1318
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,6144,1,0,4.4635
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,64,1,0,0.1639
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,128,1,0,0.2431
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,8192,1,0,5.9920
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,256,1,0,0.4059
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,512,1,0,0.7697
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1024,1,0,1.4546
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1536,1,0,2.2340
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,2048,1,0,2.8739
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,16,1,0,0.1317
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,32,1,0,0.1648
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,64,1,0,0.2449
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,3072,1,0,4.3883
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,128,1,0,0.4030
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,4096,1,0,5.8536
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,256,1,0,0.7678
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,512,1,0,1.4525
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1024,1,0,2.8521
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,16,1,0,0.1661
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1536,1,0,4.3351
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,32,1,0,0.2439
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,64,1,0,0.4104
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,2048,1,0,5.7626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,128,1,0,0.7640
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,256,1,0,1.4464
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,16,1,0,0.2493
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,32,1,0,0.4140
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,512,1,0,2.8410
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,64,1,0,0.7647
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1024,1,0,5.7274
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,128,1,0,1.4513
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,256,1,0,2.8356
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,16,1,0,0.1602
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,32,1,0,0.1692
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,64,1,0,0.1787
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,512,1,0,5.7354
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,128,1,0,0.1943
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,256,1,0,0.2332
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,512,1,0,0.3435
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,1024,1,0,0.5780
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,1536,1,0,0.8286
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,2048,1,0,1.1008
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,3072,1,0,1.6750
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,4096,1,0,2.3003
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,6144,1,0,3.5177
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,8192,1,0,4.9857
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,10240,1,0,6.4930
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,12288,1,0,8.4356
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,16,1,0,0.1677
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,32,1,0,0.1825
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,64,1,0,0.1996
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,16384,1,0,12.5118
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,128,1,0,0.2334
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,256,1,0,0.3367
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,512,1,0,0.5644
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,1024,1,0,1.0558
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,1536,1,0,1.5765
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,2048,1,0,2.1363
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,3072,1,0,3.1622
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,4096,1,0,4.3647
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,6144,1,0,7.0698
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,32768,1,0,33.9905
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,8192,1,0,10.0805
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,10240,1,0,13.2073
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,16,1,0,0.1865
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,32,1,0,0.2014
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,12288,1,0,16.5302
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,64,1,0,0.2401
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,128,1,0,0.3397
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,256,1,0,0.5574
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,512,1,0,1.0333
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,16384,1,0,24.3267
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,1024,1,0,2.0495
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,1536,1,0,2.9835
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,2048,1,0,4.0283
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,3072,1,0,6.3917
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,4096,1,0,8.9052
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,6144,1,0,13.8312
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,8192,1,0,19.5271
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,16,1,0,0.2123
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,32,1,0,0.2499
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,64,1,0,0.3482
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,128,1,0,0.5594
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,10240,1,0,26.2598
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,256,1,0,1.0174
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,512,1,0,2.0051
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,32768,1,0,67.4852
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,1024,1,0,3.8578
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,12288,1,0,33.3701
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,1536,1,0,6.0209
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,2048,1,0,8.2485
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,3072,1,0,12.4786
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,16384,1,0,48.3255
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,4096,1,0,17.1964
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,16,1,0,0.2657
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,32,1,0,0.3636
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,6144,1,0,28.0431
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,64,1,0,0.5779
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,128,1,0,1.0307
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,256,1,0,1.9808
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,8192,1,0,38.7826
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,512,1,0,3.7812
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,1024,1,0,7.9380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,10240,1,0,51.4124
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,1536,1,0,11.7733
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,2048,1,0,15.9233
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,12288,1,0,63.0270
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,3072,1,0,25.3735
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,16,1,0,0.4022
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,32,1,0,0.6113
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,64,1,0,1.0642
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,4096,1,0,34.1262
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,128,1,0,2.0056
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,32768,1,0,131.7542
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,256,1,0,3.7421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,512,1,0,7.7699
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,16384,1,0,93.4653
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,1024,1,0,15.3021
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,6144,1,0,52.3317
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,16,1,0,0.6885
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,1536,1,0,23.9416
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,32,1,0,1.1343
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,64,1,0,2.0716
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,128,1,0,3.7903
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,2048,1,0,31.6565
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,8192,1,0,74.4517
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,256,1,0,7.6819
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,512,1,0,14.9529
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,16,1,0,1.2871
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,32,1,0,2.2083
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,3072,1,0,47.0799
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,64,1,0,3.9460
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,1024,1,0,30.3386
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,128,1,0,7.7769
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,256,1,0,14.7872
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,4096,1,0,65.1204
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,16,1,0,2.5139
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,1536,1,0,44.2790
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,32,1,0,4.1925
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,64,1,0,8.0755
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,512,1,0,29.6801
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,2048,1,0,60.2105
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,16,1,0,0.1302
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,128,1,0,14.9726
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,32,1,0,0.1325
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,64,1,0,0.1381
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,128,1,0,0.1516
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,256,1,0,0.1767
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,512,1,0,0.2369
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,1024,1,0,0.3761
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,2048,1,0,0.6732
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,256,1,0,29.3477
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,1536,1,0,0.5099
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,3072,1,0,0.9476
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,1024,1,0,57.6418
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,4096,1,0,1.2792
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,6144,1,0,2.0524
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,8192,1,0,2.8724
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,10240,1,0,3.7338
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,12288,1,0,4.6454
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,16,1,0,0.1340
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,32,1,0,0.1385
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,16384,1,0,6.7470
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,128,1,0,0.1745
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,64,1,0,0.1523
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,512,1,0,0.3650
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,256,1,0,0.2321
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,1024,1,0,0.6453
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,1536,1,0,0.8928
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,2048,1,0,1.1860
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,3072,1,0,1.8561
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,32768,1,0,18.2763
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,512,1,0,56.3186
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,4096,1,0,2.5471
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,6144,1,0,3.9329
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,8192,1,0,5.5064
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,10240,1,0,7.1807
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,16,1,0,0.1435
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,32,1,0,0.1542
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,12288,1,0,9.0664
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,64,1,0,0.1757
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,16384,1,0,13.4415
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,128,1,0,0.2351
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,256,1,0,0.3601
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,512,1,0,0.6305
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,1024,1,0,1.1430
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,1536,1,0,1.7677
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,2048,1,0,2.3794
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,3072,1,0,3.5884
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,4096,1,0,4.8901
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,6144,1,0,7.6989
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,8192,1,0,10.9893
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,32768,1,0,36.1701
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,10240,1,0,14.4322
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,16,1,0,0.1583
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,32,1,0,0.1830
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,12288,1,0,17.9423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,64,1,0,0.2412
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,128,1,0,0.3640
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,256,1,0,0.6254
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,512,1,0,1.1184
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,1024,1,0,2.2926
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,1536,1,0,3.4021
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,16384,1,0,26.5542
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,2048,1,0,4.5792
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,3072,1,0,6.9948
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,4096,1,0,9.8106
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,6144,1,0,15.2311
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,16,1,0,0.1906
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,32,1,0,0.2489
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,64,1,0,0.3698
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,8192,1,0,21.7732
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,128,1,0,0.6277
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,256,1,0,1.1043
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,512,1,0,2.2424
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,10240,1,0,28.7946
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,1024,1,0,4.4047
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,1536,1,0,6.6509
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,32768,1,0,72.1660
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,2048,1,0,9.1851
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,12288,1,0,36.2315
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,3072,1,0,13.8849
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,16,1,0,0.2670
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,32,1,0,0.3883
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,4096,1,0,19.4033
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,64,1,0,0.6456
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,128,1,0,1.1133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,256,1,0,2.2227
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,16384,1,0,53.0126
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,512,1,0,4.3214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,6144,1,0,30.9089
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,1024,1,0,8.8625
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,1536,1,0,13.1806
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,16,1,0,0.4253
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,32,1,0,0.6784
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,2048,1,0,18.1425
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,64,1,0,1.1480
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,128,1,0,2.2470
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,8192,1,0,43.4959
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,256,1,0,4.2823
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,3072,1,0,28.2463
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,512,1,0,8.6788
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,16,1,0,0.7556
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,32,1,0,1.2158
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,64,1,0,2.3180
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,4096,1,0,38.8581
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,1024,1,0,17.5087
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,128,1,0,4.3169
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,256,1,0,8.6057
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,1536,1,0,26.8374
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,16,1,0,1.3774
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,32,1,0,2.4515
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,2048,1,0,36.3167
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,512,1,0,17.1922
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,64,1,0,4.4714
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,128,1,0,8.6898
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,16,1,0,0.1131
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,32,1,0,0.1158
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,64,1,0,0.1218
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,128,1,0,0.1285
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,256,1,0,0.1404
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,512,1,0,0.1780
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,256,1,0,17.0194
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,1536,1,0,0.3272
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,1024,1,0,0.2501
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,1024,1,0,35.1187
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,3072,1,0,0.5708
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,4096,1,0,0.7560
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,6144,1,0,1.1913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,2048,1,0,0.4117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,8192,1,0,1.6230
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,10240,1,0,2.1153
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,12288,1,0,2.6435
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,16,1,0,0.1191
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,16384,1,0,3.7891
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,32,1,0,0.1234
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,512,1,0,34.3735
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,64,1,0,0.1276
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,128,1,0,0.1391
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,256,1,0,0.1767
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,512,1,0,0.2437
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,32768,1,0,9.8138
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,1024,1,0,0.3958
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,1536,1,0,0.5393
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,2048,1,0,0.7017
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,3072,1,0,1.0788
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,4096,1,0,1.4575
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,6144,1,0,2.2564
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,8192,1,0,3.1370
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,10240,1,0,4.0505
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,16,1,0,0.1240
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,12288,1,0,5.0744
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,32,1,0,0.1296
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,64,1,0,0.1422
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,16384,1,0,7.3145
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,128,1,0,0.1741
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,256,1,0,0.2405
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,512,1,0,0.3856
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,1024,1,0,0.6719
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,1536,1,0,1.0298
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,2048,1,0,1.3635
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,3072,1,0,2.0590
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,4096,1,0,2.8072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,32768,1,0,19.5166
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,6144,1,0,4.3687
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,8192,1,0,6.0781
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,10240,1,0,7.9435
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,16,1,0,0.1316
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,12288,1,0,10.0221
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,32,1,0,0.1437
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,64,1,0,0.1766
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,128,1,0,0.2423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,256,1,0,0.3814
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,512,1,0,0.6622
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,16384,1,0,14.6139
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,1024,1,0,1.3130
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,1536,1,0,1.9677
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,2048,1,0,2.6433
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,3072,1,0,4.0162
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,4096,1,0,5.4666
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,6144,1,0,8.6403
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,16,1,0,0.1489
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,32,1,0,0.1817
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,8192,1,0,12.1969
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,64,1,0,0.2473
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,32768,1,0,38.7021
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,128,1,0,0.3852
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,10240,1,0,16.0139
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,256,1,0,0.6502
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,512,1,0,1.2973
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,12288,1,0,19.8009
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,1024,1,0,2.5642
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,1536,1,0,3.8254
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,2048,1,0,5.1387
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,16384,1,0,29.0975
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,3072,1,0,7.9736
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,16,1,0,0.1889
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,4096,1,0,10.9874
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,32,1,0,0.2545
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,64,1,0,0.3916
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,128,1,0,0.6587
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,256,1,0,1.2772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,6144,1,0,17.0818
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,512,1,0,2.5144
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,1024,1,0,4.9736
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,1536,1,0,7.6030
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,16,1,0,0.2737
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,8192,1,0,24.3033
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,32,1,0,0.4087
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,2048,1,0,10.3827
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,64,1,0,0.6726
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,128,1,0,1.2824
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,256,1,0,2.4871
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,3072,1,0,15.7937
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,512,1,0,4.8972
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,4096,1,0,21.9290
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,16,1,0,0.4455
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,32,1,0,0.7130
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,1024,1,0,10.0606
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,64,1,0,1.3266
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,1536,1,0,15.0287
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,128,1,0,2.5245
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,256,1,0,4.8467
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,2048,1,0,20.6795
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,16,1,0,0.7831
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,512,1,0,9.8818
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,32,1,0,1.3856
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,64,1,0,2.6002
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,128,1,0,4.8912
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,16,1,0,0.0971
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,64,1,0,0.1091
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,128,1,0,0.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,32,1,0,0.1033
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,256,1,0,9.7884
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,1024,1,0,20.0974
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,256,1,0,0.1266
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,512,1,0,0.1482
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,1024,1,0,0.1957
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,2048,1,0,0.2786
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,1536,1,0,0.2383
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,3072,1,0,0.3891
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,4096,1,0,0.5001
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,6144,1,0,0.7585
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,8192,1,0,1.0107
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,10240,1,0,1.3070
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,12288,1,0,1.5994
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,16384,1,0,2.2767
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,16,1,0,0.1036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,512,1,0,19.8140
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,32,1,0,0.1114
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,32768,1,0,5.6716
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,64,1,0,0.1136
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,128,1,0,0.1238
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,256,1,0,0.1461
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,512,1,0,0.1900
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,1536,1,0,0.3671
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,2048,1,0,0.4625
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,3072,1,0,0.6874
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,1024,1,0,0.2696
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,4096,1,0,0.9104
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,6144,1,0,1.3933
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,8192,1,0,1.9230
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,10240,1,0,2.4742
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,12288,1,0,3.0731
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,16,1,0,0.1095
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,32,1,0,0.1138
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,64,1,0,0.1238
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,16384,1,0,4.3755
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,128,1,0,0.1441
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,256,1,0,0.1860
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,512,1,0,0.2644
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,1024,1,0,0.4484
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,1536,1,0,0.6559
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,2048,1,0,0.8538
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,32768,1,0,11.0214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,3072,1,0,1.2920
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,4096,1,0,1.7511
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,6144,1,0,2.7019
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,8192,1,0,3.7220
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,10240,1,0,4.8194
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,16,1,0,0.1133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,12288,1,0,6.0520
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,64,1,0,0.1441
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,32,1,0,0.1260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,128,1,0,0.1851
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,16384,1,0,8.5459
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,256,1,0,0.2595
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,512,1,0,0.4387
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,1024,1,0,0.8345
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,1536,1,0,1.2329
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,2048,1,0,1.6673
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,3072,1,0,2.5129
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,4096,1,0,3.4026
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,32768,1,0,22.0093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,6144,1,0,5.3473
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,8192,1,0,7.2936
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,16,1,0,0.1257
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,32,1,0,0.1460
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,10240,1,0,9.5567
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,64,1,0,0.1874
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,128,1,0,0.2631
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,12288,1,0,11.8936
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,256,1,0,0.4352
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,512,1,0,0.8137
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,1024,1,0,1.6096
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,1536,1,0,2.4185
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,2048,1,0,3.2340
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,16384,1,0,17.2033
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,3072,1,0,4.9936
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,16,1,0,0.1509
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,4096,1,0,6.6769
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,32,1,0,0.1928
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,64,1,0,0.2691
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,128,1,0,0.4406
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,256,1,0,0.8144
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,6144,1,0,10.5010
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,512,1,0,1.5817
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,1024,1,0,3.1490
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,8192,1,0,14.7423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,1536,1,0,4.7996
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,16,1,0,0.2016
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,2048,1,0,6.3682
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,32,1,0,0.2751
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,64,1,0,0.4432
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,3072,1,0,9.8249
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,128,1,0,0.8068
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,256,1,0,1.5807
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,512,1,0,3.1009
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,4096,1,0,13.5476
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,16,1,0,0.2934
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,32,1,0,0.4691
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,1024,1,0,6.2043
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,64,1,0,0.8291
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,1536,1,0,9.4804
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,128,1,0,1.5834
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,256,1,0,3.0823
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,2048,1,0,12.9123
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,16,1,0,0.4971
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,512,1,0,6.1122
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,32,1,0,0.8594
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,64,1,0,1.6204
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,128,1,0,3.1071
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,1024,1,0,12.5955
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,16,1,0,0.0936
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,32,1,0,0.1023
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,64,1,0,0.1056
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,256,1,0,6.0716
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,256,1,0,0.1214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,128,1,0,0.1114
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,512,1,0,0.1355
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,1024,1,0,0.1683
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,1536,1,0,0.1992
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,2048,1,0,0.2305
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,512,1,0,12.4401
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,3072,1,0,0.3015
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,4096,1,0,0.3755
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,6144,1,0,0.5404
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,8192,1,0,0.7224
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,10240,1,0,0.9093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,12288,1,0,1.0984
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,16384,1,0,1.5288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,16,1,0,0.1017
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,32,1,0,0.1050
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,64,1,0,0.1096
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,128,1,0,0.1174
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,32768,1,0,3.5828
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,256,1,0,0.1337
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,512,1,0,0.1626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,1024,1,0,0.2200
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,1536,1,0,0.2850
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,2048,1,0,0.3471
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,3072,1,0,0.5090
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,4096,1,0,0.6581
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,6144,1,0,0.9764
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,8192,1,0,1.3207
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,10240,1,0,1.6761
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,12288,1,0,2.0641
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,16,1,0,0.1051
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,16384,1,0,2.8860
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,32,1,0,0.1112
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,64,1,0,0.1177
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,128,1,0,0.1278
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,32768,1,0,6.9107
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,256,1,0,0.1601
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,512,1,0,0.2153
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,1024,1,0,0.3384
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,1536,1,0,0.4779
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,2048,1,0,0.6135
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,3072,1,0,0.9099
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,4096,1,0,1.2260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,6144,1,0,1.8547
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,8192,1,0,2.5308
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,16,1,0,0.1113
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,10240,1,0,3.2545
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,32,1,0,0.1174
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,12288,1,0,4.0701
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,16384,1,0,5.6117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,128,1,0,0.1586
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,64,1,0,0.1310
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,256,1,0,0.2135
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,512,1,0,0.3314
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,1024,1,0,0.5991
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,1536,1,0,0.8827
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,2048,1,0,1.1702
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,3072,1,0,1.7492
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,32768,1,0,13.6326
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,4096,1,0,2.3600
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,6144,1,0,3.6916
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,8192,1,0,4.9623
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,16,1,0,0.1175
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,32,1,0,0.1275
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,10240,1,0,6.4344
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,64,1,0,0.1582
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,12288,1,0,7.9327
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,256,1,0,0.3305
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,512,1,0,0.5976
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,128,1,0,0.2096
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,16384,1,0,11.1357
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,1024,1,0,1.1426
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,1536,1,0,1.6950
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,2048,1,0,2.2640
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,3072,1,0,3.5027
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,16,1,0,0.1310
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,32,1,0,0.1607
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,64,1,0,0.2130
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,4096,1,0,4.6433
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,128,1,0,0.3346
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,6144,1,0,7.2211
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,256,1,0,0.5876
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,512,1,0,1.1272
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,1024,1,0,2.2170
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,8192,1,0,9.8733
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,1536,1,0,3.4075
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,16,1,0,0.1666
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,2048,1,0,4.4687
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,32,1,0,0.2188
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,64,1,0,0.3459
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,3072,1,0,6.8703
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,128,1,0,0.5892
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,256,1,0,1.1177
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,512,1,0,2.1944
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,4096,1,0,9.2782
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,1024,1,0,4.3898
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,16,1,0,0.2261
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,32,1,0,0.3449
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,64,1,0,0.5968
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,1536,1,0,6.6756
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,128,1,0,1.1203
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,2048,1,0,8.9337
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,256,1,0,2.1821
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,512,1,0,4.3411
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,16,1,0,0.3650
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,32,1,0,0.6195
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,64,1,0,1.1450
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,128,1,0,2.1919
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,1024,1,0,8.7748
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,16,1,0,0.0933
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,32,1,0,0.1018
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,256,1,0,4.3163
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,64,1,0,0.1036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,128,1,0,0.1095
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,256,1,0,0.1186
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,512,1,0,0.1277
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,1024,1,0,0.1560
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,512,1,0,8.7078
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,1536,1,0,0.1788
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,2048,1,0,0.2076
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,3072,1,0,0.2647
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,4096,1,0,0.3268
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,6144,1,0,0.4503
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,8192,1,0,0.5778
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,10240,1,0,0.7030
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,12288,1,0,0.8630
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,16384,1,0,1.1771
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,16,1,0,0.0972
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,32,1,0,0.1060
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,32768,1,0,2.5537
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,128,1,0,0.1152
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,64,1,0,0.1073
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,256,1,0,0.1283
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,512,1,0,0.1524
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,1024,1,0,0.1954
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,1536,1,0,0.2469
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,2048,1,0,0.3075
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,3072,1,0,0.4176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,4096,1,0,0.5394
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,6144,1,0,0.7787
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,8192,1,0,1.0477
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,10240,1,0,1.3040
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,12288,1,0,1.5739
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,16384,1,0,2.1549
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,16,1,0,0.1058
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,32,1,0,0.1071
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,64,1,0,0.1162
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,128,1,0,0.1235
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,32768,1,0,4.8636
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,256,1,0,0.1483
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,512,1,0,0.1916
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,1024,1,0,0.2958
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,1536,1,0,0.4061
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,2048,1,0,0.5169
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,3072,1,0,0.7372
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,4096,1,0,0.9800
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,6144,1,0,1.4500
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,8192,1,0,1.9481
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,10240,1,0,2.4807
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,16,1,0,0.1092
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,12288,1,0,3.0920
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,32,1,0,0.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,16384,1,0,4.1674
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,64,1,0,0.1238
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,256,1,0,0.1888
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,512,1,0,0.2927
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,1024,1,0,0.5028
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,128,1,0,0.1459
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,32768,1,0,9.5393
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,1536,1,0,0.7249
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,2048,1,0,0.9504
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,3072,1,0,1.3879
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,4096,1,0,1.8462
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,6144,1,0,2.8833
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,16,1,0,0.1120
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,32,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,8192,1,0,3.8088
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,10240,1,0,4.9198
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,64,1,0,0.1461
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,12288,1,0,5.9976
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,128,1,0,0.1858
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,256,1,0,0.2880
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,16384,1,0,8.2508
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,512,1,0,0.4991
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,1024,1,0,0.9312
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,1536,1,0,1.3528
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,2048,1,0,1.7945
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,3072,1,0,2.7726
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,4096,1,0,3.6350
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,16,1,0,0.1235
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,32,1,0,0.1461
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,64,1,0,0.1848
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,128,1,0,0.2878
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,6144,1,0,5.6275
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,256,1,0,0.4910
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,512,1,0,0.9205
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,8192,1,0,7.5989
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,1024,1,0,1.7676
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,1536,1,0,2.7189
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,2048,1,0,3.5412
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,16,1,0,0.1500
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,32,1,0,0.1871
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,3072,1,0,5.4361
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,64,1,0,0.2870
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,128,1,0,0.4910
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,4096,1,0,7.2793
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,256,1,0,0.9140
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,512,1,0,1.7499
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,1024,1,0,3.4970
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,32,1,0,0.2918
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,16,1,0,0.1931
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,1536,1,0,5.3386
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,64,1,0,0.4925
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,128,1,0,0.9226
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,2048,1,0,7.1361
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,256,1,0,1.7483
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,16,1,0,0.2999
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,32,1,0,0.5015
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,512,1,0,3.4677
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,64,1,0,0.9280
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,128,1,0,1.7500
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,1024,1,0,7.0258
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,16,1,0,0.0911
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,32,1,0,0.0950
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,256,1,0,3.4590
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,64,1,0,0.1034
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,128,1,0,0.1079
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,256,1,0,0.1178
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,512,1,0,0.1256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,512,1,0,7.0082
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,1024,1,0,0.1531
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,2048,1,0,0.1973
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,3072,1,0,0.2548
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,4096,1,0,0.3123
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,6144,1,0,0.4158
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,1536,1,0,0.1729
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,8192,1,0,0.5308
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,10240,1,0,0.6360
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,12288,1,0,0.7712
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,16384,1,0,0.9880
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,16,1,0,0.1003
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,32,1,0,0.1054
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,32768,1,0,2.0690
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,64,1,0,0.1074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,128,1,0,0.1154
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,256,1,0,0.1217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,512,1,0,0.1466
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,1024,1,0,0.1885
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,1536,1,0,0.2364
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,2048,1,0,0.2852
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,3072,1,0,0.3845
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,4096,1,0,0.5026
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,6144,1,0,0.6883
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,8192,1,0,0.9052
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,10240,1,0,1.1104
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,12288,1,0,1.3463
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,16384,1,0,1.8158
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,16,1,0,0.1040
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,32,1,0,0.1093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,32768,1,0,3.8703
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,64,1,0,0.1129
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,128,1,0,0.1197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,256,1,0,0.1441
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,512,1,0,0.1830
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,1024,1,0,0.2776
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,1536,1,0,0.3743
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,2048,1,0,0.4626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,3072,1,0,0.6615
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,4096,1,0,0.8606
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,6144,1,0,1.2672
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,8192,1,0,1.6897
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,10240,1,0,2.1317
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,12288,1,0,2.6239
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,16,1,0,0.1071
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,16384,1,0,3.4626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,32,1,0,0.1133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,64,1,0,0.1197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,128,1,0,0.1429
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,256,1,0,0.1795
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,32768,1,0,7.5915
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,512,1,0,0.2670
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,1024,1,0,0.4525
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,1536,1,0,0.6504
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,2048,1,0,0.8384
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,3072,1,0,1.2273
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,4096,1,0,1.6239
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,6144,1,0,2.4980
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,8192,1,0,3.2631
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,16,1,0,0.1132
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,10240,1,0,4.1972
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,32,1,0,0.1207
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,12288,1,0,5.0784
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,64,1,0,0.1419
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,128,1,0,0.1768
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,256,1,0,0.2645
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,512,1,0,0.4506
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,1024,1,0,0.8262
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,16384,1,0,6.8703
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,1536,1,0,1.2079
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,2048,1,0,1.5888
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,3072,1,0,2.4377
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,16,1,0,0.1211
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,4096,1,0,3.1583
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,32,1,0,0.1421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,6144,1,0,4.8589
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,64,1,0,0.1767
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,128,1,0,0.2624
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,8192,1,0,6.5348
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,256,1,0,0.4569
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,512,1,0,0.8202
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,1024,1,0,1.5715
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,1536,1,0,2.4044
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,2048,1,0,3.1021
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,16,1,0,0.1422
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,3072,1,0,4.7861
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,32,1,0,0.1768
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,4096,1,0,6.3407
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,64,1,0,0.2627
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,128,1,0,0.4471
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,256,1,0,0.8187
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,512,1,0,1.5663
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,1024,1,0,3.0771
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,16,1,0,0.1800
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,32,1,0,0.2650
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,64,1,0,0.4477
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,1536,1,0,4.7138
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,2048,1,0,6.2451
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,128,1,0,0.8216
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,256,1,0,1.5566
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,512,1,0,3.0598
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,16,1,0,0.2735
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,32,1,0,0.4563
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,64,1,0,0.8257
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,1024,1,0,6.2023
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,128,1,0,1.5591
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,16,1,0,0.0882
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,256,1,0,3.0548
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,32,1,0,0.0928
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,64,1,0,0.0969
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,128,1,0,0.1054
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,256,1,0,0.1134
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,512,1,0,0.1214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,512,1,0,6.1683
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,1024,1,0,0.1482
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,1536,1,0,0.1684
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,2048,1,0,0.1924
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,3072,1,0,0.2463
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,4096,1,0,0.3005
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,6144,1,0,0.4012
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,8192,1,0,0.4995
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,10240,1,0,0.6106
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,12288,1,0,0.7365
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,16384,1,0,0.9480
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,16,1,0,0.0930
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,64,1,0,0.1029
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,128,1,0,0.1147
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,32768,1,0,1.8093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,256,1,0,0.1176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,512,1,0,0.1439
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,32,1,0,0.0969
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,1024,1,0,0.1808
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,1536,1,0,0.2302
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,2048,1,0,0.2808
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,3072,1,0,0.3694
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,4096,1,0,0.4547
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,6144,1,0,0.6714
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,8192,1,0,0.8606
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,10240,1,0,1.0406
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,12288,1,0,1.2447
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,16384,1,0,1.6369
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,16,1,0,0.0999
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,32,1,0,0.1032
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,64,1,0,0.1091
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,32768,1,0,3.3802
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,128,1,0,0.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,256,1,0,0.1406
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,512,1,0,0.1755
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,1024,1,0,0.2658
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,1536,1,0,0.3464
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,2048,1,0,0.4314
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,3072,1,0,0.6307
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,4096,1,0,0.8199
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,6144,1,0,1.1785
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,8192,1,0,1.5492
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,10240,1,0,1.9402
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,12288,1,0,2.4057
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,16384,1,0,3.1318
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,16,1,0,0.1057
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,32,1,0,0.1094
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,64,1,0,0.1153
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,128,1,0,0.1380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,256,1,0,0.1726
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,512,1,0,0.2574
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,32768,1,0,6.5915
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,1024,1,0,0.4217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,1536,1,0,0.6155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,2048,1,0,0.7990
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,3072,1,0,1.1453
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,4096,1,0,1.5099
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,6144,1,0,2.3198
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,8192,1,0,3.0007
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,16,1,0,0.1133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,32,1,0,0.1176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,10240,1,0,3.8609
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,12288,1,0,4.6105
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,64,1,0,0.1374
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,128,1,0,0.1715
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,256,1,0,0.2551
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,16384,1,0,6.1817
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,512,1,0,0.4178
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,1024,1,0,0.7872
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,1536,1,0,1.1285
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,2048,1,0,1.4871
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,3072,1,0,2.2781
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,4096,1,0,2.9355
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,16,1,0,0.1152
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,32,1,0,0.1383
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,64,1,0,0.1712
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,6144,1,0,4.5076
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,128,1,0,0.2546
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,8192,1,0,5.9809
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,256,1,0,0.4142
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,512,1,0,0.7808
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,1024,1,0,1.4764
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,1536,1,0,2.2612
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,2048,1,0,2.9072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,16,1,0,0.1380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,32,1,0,0.1707
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,64,1,0,0.2513
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,128,1,0,0.4160
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,3072,1,0,4.4288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,4096,1,0,5.8773
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,256,1,0,0.7767
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,512,1,0,1.4676
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,1024,1,0,2.8886
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,16,1,0,0.1721
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,1536,1,0,4.4112
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,32,1,0,0.2492
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,2048,1,0,5.8391
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,64,1,0,0.4138
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,128,1,0,0.7787
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,256,1,0,1.4708
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,512,1,0,2.8772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,16,1,0,0.2528
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,32,1,0,0.4203
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,64,1,0,0.7806
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,128,1,0,1.4673
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,1024,1,0,5.7964
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,256,1,0,2.8736
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,512,1,0,5.7782
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,512,1,0,0.3043
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1536,1,0,0.6899
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1024,1,0,0.4904
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,2048,1,0,0.9129
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,3072,1,0,1.4277
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,4096,1,0,1.9409
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,6144,1,0,3.1374
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,8192,1,0,4.4803
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,10240,1,0,6.0095
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,16,1,0,0.1647
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,128,1,0,0.1932
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,64,1,0,0.1738
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,32,1,0,0.1689
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,256,1,0,0.2342
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,32,1,0,0.1768
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,16,1,0,0.1671
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,64,1,0,0.1985
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,128,1,0,0.2362
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,256,1,0,0.2993
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,512,1,0,0.4702
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1024,1,0,0.8566
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1536,1,0,1.3047
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,2048,1,0,1.7288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,3072,1,0,2.6885
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,4096,1,0,3.7158
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,12288,1,0,7.6733
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,16384,1,0,11.6606
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,16,1,0,0.1824
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,16384,1,0,23.0751
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,32,1,0,0.2012
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,64,1,0,0.2414
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,32768,1,0,34.0641
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,128,1,0,0.2992
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,256,1,0,0.4625
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,512,1,0,0.8256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1024,1,0,1.6170
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1536,1,0,2.4483
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,2048,1,0,3.3057
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,6144,1,0,6.0819
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,8192,1,0,8.9899
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,3072,1,0,5.2005
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,10240,1,0,12.0737
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,12288,1,0,15.1414
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,4096,1,0,7.4971
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,6144,1,0,11.9858
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,32768,1,0,67.7352
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,8192,1,0,17.8035
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,16,1,0,0.2120
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,32,1,0,0.2493
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,64,1,0,0.3082
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,128,1,0,0.4637
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,10240,1,0,23.9147
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,256,1,0,0.8121
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,512,1,0,1.5657
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1024,1,0,3.0971
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,12288,1,0,30.6141
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1536,1,0,4.7481
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,2048,1,0,6.6963
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,3072,1,0,10.2748
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,16384,1,0,45.8779
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,4096,1,0,14.8567
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,16,1,0,0.2686
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,6144,1,0,24.4085
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,32,1,0,0.3264
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,64,1,0,0.4814
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,128,1,0,0.8119
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,256,1,0,1.5424
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,8192,1,0,35.3423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,512,1,0,2.9966
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1024,1,0,6.2904
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1536,1,0,9.3670
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,10240,1,0,47.5528
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,2048,1,0,13.2757
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,12288,1,0,58.7723
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,3072,1,0,20.9776
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,16,1,0,0.3633
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,32,1,0,0.5192
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,64,1,0,0.8496
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,128,1,0,1.5429
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,4096,1,0,29.5157
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,256,1,0,2.9528
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,512,1,0,6.0766
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,32768,1,0,131.9938
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1024,1,0,12.4741
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,16384,1,0,88.4141
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,6144,1,0,46.3590
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1536,1,0,19.2056
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,16,1,0,0.5971
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,32,1,0,0.9257
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,64,1,0,1.6178
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,2048,1,0,26.3513
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,128,1,0,2.9584
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,256,1,0,6.0032
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,8192,1,0,67.5410
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,512,1,0,12.0293
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,3072,1,0,39.5958
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,16,1,0,1.0775
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,32,1,0,1.7679
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,64,1,0,3.1085
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1024,1,0,24.7310
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,128,1,0,6.0164
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,256,1,0,11.8950
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1536,1,0,36.0375
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,16,1,0,2.0751
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,4096,1,0,55.9225
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,32,1,0,3.4116
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,512,1,0,23.8851
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,64,1,0,6.3285
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,2048,1,0,49.5738
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,128,1,0,11.9198
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,256,1,0,23.6447
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1024,1,0,46.3563
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,512,1,0,44.7477
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,512,1,0,0.2321
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,2048,1,0,0.5935
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1536,1,0,0.4632
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,3072,1,0,0.8772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,4096,1,0,1.1790
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,6144,1,0,1.8881
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1024,1,0,0.3428
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,8192,1,0,2.6605
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,10240,1,0,3.5167
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,12288,1,0,4.4510
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,16384,1,0,6.5285
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,32768,1,0,18.5873
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,16,1,0,0.1470
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,32,1,0,0.1495
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,64,1,0,0.1521
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,256,1,0,0.2311
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,512,1,0,0.3324
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,128,1,0,0.1624
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1024,1,0,0.5582
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1536,1,0,0.8093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,2048,1,0,1.0646
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,3072,1,0,1.6547
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,256,1,0,0.1899
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,4096,1,0,2.2665
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,6144,1,0,3.6245
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,8192,1,0,5.1615
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,10240,1,0,6.8479
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,16,1,0,0.1467
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,32,1,0,0.1517
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,12288,1,0,8.6860
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,16,1,0,0.1544
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,16384,1,0,13.0330
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,32,1,0,0.1667
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,64,1,0,0.1908
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,128,1,0,0.2313
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,256,1,0,0.3247
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,512,1,0,0.5396
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1024,1,0,1.0045
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,64,1,0,0.1620
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1536,1,0,1.5245
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,2048,1,0,2.0531
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,3072,1,0,3.1716
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,4096,1,0,4.4017
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,128,1,0,0.1882
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,6144,1,0,7.0925
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,32768,1,0,36.7712
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,8192,1,0,10.3341
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,16,1,0,0.1691
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,32,1,0,0.1964
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,10240,1,0,13.7249
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,64,1,0,0.2364
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,128,1,0,0.3293
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,256,1,0,0.5312
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,12288,1,0,17.1488
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,512,1,0,0.9757
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1024,1,0,1.9413
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1536,1,0,2.9375
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,2048,1,0,3.9929
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,16384,1,0,25.7236
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,3072,1,0,6.2087
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,4096,1,0,8.8392
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,6144,1,0,14.0151
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,16,1,0,0.2058
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,32,1,0,0.2446
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,64,1,0,0.3353
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,128,1,0,0.5309
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,256,1,0,0.9604
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,8192,1,0,20.4825
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,512,1,0,1.8821
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1024,1,0,3.7814
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,10240,1,0,27.2111
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1536,1,0,5.7536
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,2048,1,0,8.0456
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,12288,1,0,34.5978
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,3072,1,0,12.2694
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,16,1,0,0.2652
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,32768,1,0,73.1142
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,32,1,0,0.3534
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,64,1,0,0.5492
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,4096,1,0,17.5330
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,128,1,0,0.9619
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,16384,1,0,51.1311
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,256,1,0,1.8634
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,512,1,0,3.6712
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,6144,1,0,28.3733
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1024,1,0,7.6496
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1536,1,0,11.3931
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,16,1,0,0.3913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,32,1,0,0.5871
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,64,1,0,0.9981
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,2048,1,0,15.9381
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,8192,1,0,40.6443
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,128,1,0,1.8673
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,256,1,0,3.6289
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,512,1,0,7.4351
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,3072,1,0,24.9378
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,16,1,0,0.6638
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,32,1,0,1.0740
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1024,1,0,15.1134
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,64,1,0,1.9399
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,128,1,0,3.6407
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,4096,1,0,34.8259
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,256,1,0,7.3459
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1536,1,0,23.2200
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,16,1,0,1.2262
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,32,1,0,2.0888
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,64,1,0,3.7864
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,512,1,0,14.7019
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,2048,1,0,31.6422
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,128,1,0,7.3563
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,256,1,0,14.5718
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1024,1,0,30.0983
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,512,1,0,29.2399
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1024,1,0,0.2532
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1536,1,0,0.3288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,2048,1,0,0.4087
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,4096,1,0,0.7414
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,512,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,16,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,32,1,0,0.1304
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,64,1,0,0.1300
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,128,1,0,0.1449
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,3072,1,0,0.5718
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,256,1,0,0.1616
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,16,1,0,0.1299
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,32,1,0,0.1298
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,6144,1,0,1.1372
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,8192,1,0,1.5800
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,128,1,0,0.1558
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,64,1,0,0.1421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,256,1,0,0.1871
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,512,1,0,0.2465
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1024,1,0,0.3868
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1536,1,0,0.5295
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,2048,1,0,0.6750
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,3072,1,0,1.0044
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,4096,1,0,1.3629
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,6144,1,0,2.1784
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,8192,1,0,3.0604
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,10240,1,0,4.0118
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,12288,1,0,5.0432
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,16384,1,0,7.3276
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,16,1,0,0.1306
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,32,1,0,0.1420
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,64,1,0,0.1599
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,10240,1,0,2.0748
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,128,1,0,0.1875
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,256,1,0,0.2419
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,512,1,0,0.3751
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1024,1,0,0.6389
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,12288,1,0,2.6119
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,32768,1,0,20.1730
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1536,1,0,0.9325
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,16384,1,0,3.7802
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,2048,1,0,1.2463
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,3072,1,0,1.9344
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,4096,1,0,2.6627
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,32768,1,0,10.1740
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,6144,1,0,4.2199
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,8192,1,0,5.9530
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,10240,1,0,7.8341
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,16,1,0,0.1468
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,32,1,0,0.1592
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,64,1,0,0.1872
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,12288,1,0,9.8609
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,128,1,0,0.2444
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,256,1,0,0.3685
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,512,1,0,0.6205
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1024,1,0,1.1845
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,16384,1,0,14.5614
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1536,1,0,1.8098
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,2048,1,0,2.4482
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,3072,1,0,3.7679
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,4096,1,0,5.1878
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,6144,1,0,8.2485
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,16,1,0,0.1671
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,32,1,0,0.1937
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,8192,1,0,11.9117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,64,1,0,0.2513
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,128,1,0,0.3700
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,256,1,0,0.6131
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,10240,1,0,15.6661
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,32768,1,0,39.8223
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,512,1,0,1.1541
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1024,1,0,2.3386
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,12288,1,0,19.4598
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1536,1,0,3.5288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,2048,1,0,4.7800
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,16,1,0,0.2026
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,3072,1,0,7.3863
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,32,1,0,0.2578
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,16384,1,0,28.8355
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,64,1,0,0.3788
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,4096,1,0,10.3936
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,128,1,0,0.6113
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,256,1,0,1.1414
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,6144,1,0,16.2894
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,512,1,0,2.2805
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1024,1,0,4.5746
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1536,1,0,6.9488
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,16,1,0,0.2789
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,8192,1,0,23.5462
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,2048,1,0,9.6125
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,32,1,0,0.3981
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,64,1,0,0.6288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,128,1,0,1.1426
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,3072,1,0,14.5837
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,256,1,0,2.2590
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,512,1,0,4.4584
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,4096,1,0,20.5977
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,16,1,0,0.4339
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,32,1,0,0.6678
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1024,1,0,9.1911
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,64,1,0,1.1791
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1536,1,0,13.6749
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,128,1,0,2.2594
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,256,1,0,4.4107
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,2048,1,0,19.0384
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,16,1,0,0.7457
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,32,1,0,1.2541
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,512,1,0,8.9897
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,64,1,0,2.3344
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,128,1,0,4.4302
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1024,1,0,18.2248
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,256,1,0,8.9065
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,512,1,0,17.7642
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,512,1,0,0.1639
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1024,1,0,0.2139
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,16,1,0,0.1239
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,32,1,0,0.1218
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,64,1,0,0.1257
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,128,1,0,0.1299
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1536,1,0,0.2596
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,256,1,0,0.1420
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,2048,1,0,0.3079
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,3072,1,0,0.4174
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,16,1,0,0.1239
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,32,1,0,0.1259
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,64,1,0,0.1299
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,128,1,0,0.1380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,256,1,0,0.1665
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,4096,1,0,0.5359
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,512,1,0,0.2074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,8192,1,0,1.0463
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1024,1,0,0.2975
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1536,1,0,0.3936
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,10240,1,0,1.3430
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,2048,1,0,0.4912
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,12288,1,0,1.6743
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,3072,1,0,0.6973
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,4096,1,0,0.9202
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,16384,1,0,2.3785
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,6144,1,0,1.4230
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,8192,1,0,1.9773
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,10240,1,0,2.5757
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,12288,1,0,3.1967
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,16384,1,0,4.5745
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,16,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,32,1,0,0.1299
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,64,1,0,0.1395
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,128,1,0,0.1634
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,256,1,0,0.2047
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,32768,1,0,6.0781
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,512,1,0,0.2896
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1024,1,0,0.4698
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,32768,1,0,11.7306
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1536,1,0,0.6539
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,2048,1,0,0.8549
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,3072,1,0,1.2923
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,4096,1,0,1.7625
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,6144,1,0,2.7680
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,8192,1,0,3.8451
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,10240,1,0,4.9978
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,12288,1,0,6.1973
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,32,1,0,0.1385
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,16,1,0,0.1289
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,64,1,0,0.1564
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,16384,1,0,8.8628
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,128,1,0,0.2013
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,256,1,0,0.2876
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,512,1,0,0.4558
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1024,1,0,0.8172
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1536,1,0,1.2237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,2048,1,0,1.6455
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,3072,1,0,2.5237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,4096,1,0,3.4449
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,6144,1,0,5.3930
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,32768,1,0,23.2354
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,6144,1,0,0.7833
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,8192,1,0,7.5039
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,16,1,0,0.1412
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,10240,1,0,9.7679
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,32,1,0,0.1668
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,64,1,0,0.2047
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,128,1,0,0.2873
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,256,1,0,0.4499
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,512,1,0,0.8010
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,12288,1,0,12.1447
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1024,1,0,1.5896
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1536,1,0,2.4013
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,2048,1,0,3.2334
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,16384,1,0,17.6303
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,3072,1,0,4.9476
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,16,1,0,0.1701
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,4096,1,0,6.7482
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,32,1,0,0.2117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,64,1,0,0.2935
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,6144,1,0,10.5888
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,128,1,0,0.4513
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,256,1,0,0.7929
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,512,1,0,1.5575
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1024,1,0,3.1242
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,8192,1,0,14.9432
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1536,1,0,4.7141
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,16,1,0,0.2178
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,2048,1,0,6.3343
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,32,1,0,0.3019
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,64,1,0,0.4595
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,128,1,0,0.7914
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,3072,1,0,9.7221
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,256,1,0,1.5428
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,512,1,0,3.0739
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,4096,1,0,13.4777
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1024,1,0,6.1175
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,16,1,0,0.3201
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,32,1,0,0.4783
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1536,1,0,9.2608
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,64,1,0,0.8107
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,128,1,0,1.5484
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,256,1,0,3.0506
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,2048,1,0,12.6707
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,16,1,0,0.5180
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,512,1,0,6.0118
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,32,1,0,0.8487
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,64,1,0,1.5845
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,128,1,0,3.0476
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1024,1,0,12.2926
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,256,1,0,5.9799
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,512,1,0,12.0588
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,512,1,0,0.1524
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1024,1,0,0.1973
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,32,1,0,0.1217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,16,1,0,0.1176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,64,1,0,0.1219
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,128,1,0,0.1266
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1536,1,0,0.2306
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,256,1,0,0.1378
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,2048,1,0,0.2690
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,3072,1,0,0.3469
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,16,1,0,0.1247
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,32,1,0,0.1258
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,64,1,0,0.1280
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,128,1,0,0.1319
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,256,1,0,0.1506
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,512,1,0,0.1906
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1024,1,0,0.2570
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,6144,1,0,0.5951
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,8192,1,0,0.7984
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1536,1,0,0.3275
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,10240,1,0,0.9997
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,3072,1,0,0.5477
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,4096,1,0,0.7184
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,4096,1,0,0.4198
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,2048,1,0,0.3947
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,12288,1,0,1.2228
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,16384,1,0,1.6886
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,6144,1,0,1.0719
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,8192,1,0,1.4517
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,16384,1,0,3.1836
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,10240,1,0,1.8538
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,16,1,0,0.1256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,32,1,0,0.1278
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,64,1,0,0.1323
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,32768,1,0,7.6612
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,128,1,0,0.1504
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,256,1,0,0.1800
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,32768,1,0,4.0158
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,512,1,0,0.2528
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1024,1,0,0.3830
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1536,1,0,0.5210
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,2048,1,0,0.6752
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,3072,1,0,0.9915
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,4096,1,0,1.3239
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,6144,1,0,2.0273
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,8192,1,0,2.7876
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,10240,1,0,3.5824
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,12288,1,0,4.3989
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,16,1,0,0.1288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,32,1,0,0.1319
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,16384,1,0,6.1509
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,64,1,0,0.1483
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,256,1,0,0.2470
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,128,1,0,0.1826
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,512,1,0,0.3759
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1024,1,0,0.6479
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1536,1,0,0.9497
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,2048,1,0,1.2569
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,3072,1,0,1.9022
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,32768,1,0,14.9050
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,4096,1,0,2.5751
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,6144,1,0,3.9660
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,8192,1,0,5.4309
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,16,1,0,0.1338
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,10240,1,0,6.9594
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,32,1,0,0.1489
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,64,1,0,0.1830
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,12288,1,0,8.5586
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,12288,1,0,2.2716
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,128,1,0,0.2473
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,256,1,0,0.3716
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,512,1,0,0.6370
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1024,1,0,1.2224
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1536,1,0,1.8301
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,16384,1,0,12.0183
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,2048,1,0,2.4585
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,16,1,0,0.1525
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,3072,1,0,3.7279
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,32,1,0,0.1848
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,4096,1,0,5.0305
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,64,1,0,0.2475
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,256,1,0,0.6355
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,128,1,0,0.3736
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,6144,1,0,7.7602
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,512,1,0,1.2058
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,8192,1,0,10.6275
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1024,1,0,2.3948
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1536,1,0,3.6102
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,16,1,0,0.1910
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,2048,1,0,4.8254
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,32,1,0,0.2553
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,3072,1,0,7.2960
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,64,1,0,0.3787
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,128,1,0,0.6366
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,256,1,0,1.1996
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,4096,1,0,9.8834
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,512,1,0,2.3676
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1024,1,0,4.7113
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,16,1,0,0.2620
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,32,1,0,0.3859
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1536,1,0,7.0708
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,64,1,0,0.6441
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,128,1,0,1.1973
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,2048,1,0,9.4880
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,256,1,0,2.3518
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,512,1,0,4.6581
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,16,1,0,0.4074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,32,1,0,0.6636
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,64,1,0,1.2160
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,128,1,0,2.3561
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1024,1,0,9.2687
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,256,1,0,4.6355
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,512,1,0,9.1704
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,512,1,0,0.1507
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1024,1,0,0.1881
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,16,1,0,0.1196
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,32,1,0,0.1193
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,64,1,0,0.1256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,128,1,0,0.1260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,256,1,0,0.1343
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1536,1,0,0.2187
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,2048,1,0,0.2548
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,3072,1,0,0.3198
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,16,1,0,0.1183
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,32,1,0,0.1215
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,64,1,0,0.1256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,128,1,0,0.1298
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,256,1,0,0.1455
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,512,1,0,0.1778
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1024,1,0,0.2402
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,4096,1,0,0.3867
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,6144,1,0,0.5189
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1536,1,0,0.2999
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,2048,1,0,0.3606
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,3072,1,0,0.4803
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,8192,1,0,0.6586
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,10240,1,0,0.8117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,4096,1,0,0.6046
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,12288,1,0,0.9912
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,16384,1,0,1.3607
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,6144,1,0,0.9010
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,8192,1,0,1.2105
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,10240,1,0,1.5262
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,12288,1,0,1.8393
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,16,1,0,0.1216
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,32,1,0,0.1253
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,32768,1,0,2.9851
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,16384,1,0,2.5093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,32768,1,0,5.6369
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,64,1,0,0.1296
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,128,1,0,0.1493
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,256,1,0,0.1707
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,512,1,0,0.2339
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1024,1,0,0.3473
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1536,1,0,0.4595
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,2048,1,0,0.5788
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,3072,1,0,0.8486
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,4096,1,0,1.1303
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,6144,1,0,1.6895
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,8192,1,0,2.2758
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,10240,1,0,2.8801
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,12288,1,0,3.5017
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,32,1,0,0.1316
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,16,1,0,0.1256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,16384,1,0,4.8117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,64,1,0,0.1419
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,128,1,0,0.1772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,256,1,0,0.2303
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,512,1,0,0.3391
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1024,1,0,0.5661
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1536,1,0,0.8252
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,32768,1,0,10.8766
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,2048,1,0,1.0878
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,3072,1,0,1.6097
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,4096,1,0,2.1438
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,6144,1,0,3.2615
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,16,1,0,0.1325
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,8192,1,0,4.4032
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,32,1,0,0.1439
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,10240,1,0,5.6036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,64,1,0,0.1712
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,128,1,0,0.2271
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,12288,1,0,6.8051
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,256,1,0,0.3371
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,512,1,0,0.5639
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,16384,1,0,9.3697
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1024,1,0,1.0662
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1536,1,0,1.5677
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,2048,1,0,2.0829
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,3072,1,0,3.1356
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,4096,1,0,4.2002
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,16,1,0,0.1443
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,32,1,0,0.1724
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,64,1,0,0.2282
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,6144,1,0,6.3694
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,128,1,0,0.3356
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,8192,1,0,8.6329
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,256,1,0,0.5562
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,512,1,0,1.0540
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1024,1,0,2.0447
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1536,1,0,3.0647
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,2048,1,0,4.0843
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,16,1,0,0.1713
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,32,1,0,0.2305
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,3072,1,0,6.1404
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,64,1,0,0.3387
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,128,1,0,0.5592
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,4096,1,0,8.2338
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,256,1,0,1.0471
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,512,1,0,2.0324
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1024,1,0,4.0174
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,32,1,0,0.3414
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,16,1,0,0.2368
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,64,1,0,0.5631
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1536,1,0,6.0251
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,128,1,0,1.0482
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,2048,1,0,8.0308
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,256,1,0,2.0234
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,16,1,0,0.3529
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,32,1,0,0.5728
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,512,1,0,3.9908
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,64,1,0,1.0581
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,128,1,0,2.0230
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1024,1,0,7.9235
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,256,1,0,3.9744
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,512,1,0,7.8846
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,512,1,0,0.1474
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1024,1,0,0.1847
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,16,1,0,0.1234
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,32,1,0,0.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,64,1,0,0.1242
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1536,1,0,0.2171
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,128,1,0,0.1257
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,256,1,0,0.1327
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,2048,1,0,0.2460
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,3072,1,0,0.3084
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,16,1,0,0.1154
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,32,1,0,0.1200
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,64,1,0,0.1256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,128,1,0,0.1305
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,256,1,0,0.1439
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,512,1,0,0.1769
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,4096,1,0,0.3706
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1024,1,0,0.2347
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,8192,1,0,0.6227
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,6144,1,0,0.4911
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1536,1,0,0.2872
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,10240,1,0,0.7581
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,12288,1,0,0.8992
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,2048,1,0,0.3450
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,16384,1,0,1.1731
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,3072,1,0,0.4549
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,4096,1,0,0.5743
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,6144,1,0,0.8209
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,8192,1,0,1.0709
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,10240,1,0,1.3293
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,12288,1,0,1.6042
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,16384,1,0,2.1853
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,32768,1,0,2.4854
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,16,1,0,0.1196
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,32,1,0,0.1266
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,64,1,0,0.1320
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,128,1,0,0.1421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,32768,1,0,4.6245
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,256,1,0,0.1678
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,512,1,0,0.2274
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1024,1,0,0.3318
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1536,1,0,0.4350
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,2048,1,0,0.5458
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,3072,1,0,0.7847
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,4096,1,0,1.0192
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,6144,1,0,1.5130
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,8192,1,0,2.0324
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,10240,1,0,2.5518
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,12288,1,0,3.0738
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,16,1,0,0.1256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,16384,1,0,4.1373
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,64,1,0,0.1428
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,128,1,0,0.1706
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,32,1,0,0.1298
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,256,1,0,0.2252
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,32768,1,0,8.8528
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,512,1,0,0.3249
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1024,1,0,0.5326
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1536,1,0,0.7623
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,2048,1,0,0.9943
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,3072,1,0,1.4629
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,4096,1,0,1.9544
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,6144,1,0,2.9180
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,16,1,0,0.1296
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,8192,1,0,3.8996
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,10240,1,0,4.9167
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,12288,1,0,5.9243
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,64,1,0,0.1685
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,128,1,0,0.2227
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,256,1,0,0.3221
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,512,1,0,0.5259
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,32,1,0,0.1396
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,16384,1,0,8.0292
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1024,1,0,0.9793
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1536,1,0,1.4377
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,2048,1,0,1.9129
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,3072,1,0,2.8380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,4096,1,0,3.7738
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,16,1,0,0.1417
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,32,1,0,0.1723
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,6144,1,0,5.6814
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,64,1,0,0.2214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,128,1,0,0.3200
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,256,1,0,0.5239
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,512,1,0,0.9737
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,8192,1,0,7.6325
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1024,1,0,1.8877
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1536,1,0,2.7953
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,2048,1,0,3.7134
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,16,1,0,0.1724
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,32,1,0,0.2214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,64,1,0,0.3197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,3072,1,0,5.5460
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,128,1,0,0.5215
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,256,1,0,0.9718
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,4096,1,0,7.4063
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,512,1,0,1.8765
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1024,1,0,3.6778
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,32,1,0,0.3216
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1536,1,0,5.4795
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,16,1,0,0.2255
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,64,1,0,0.5245
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,2048,1,0,7.2927
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,128,1,0,0.9728
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,256,1,0,1.8701
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,16,1,0,0.3285
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,512,1,0,3.6572
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,32,1,0,0.5293
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,64,1,0,0.9779
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,128,1,0,1.8767
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1024,1,0,7.2460
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,256,1,0,3.6502
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,512,1,0,7.2032
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,512,1,0,0.1421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1024,1,0,0.1728
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,32,1,0,0.1205
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,64,1,0,0.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1536,1,0,0.2128
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,256,1,0,0.1277
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,128,1,0,0.1196
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,16,1,0,0.1154
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,2048,1,0,0.2423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,3072,1,0,0.3024
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,16,1,0,0.1156
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,32,1,0,0.1175
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,64,1,0,0.1221
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,128,1,0,0.1276
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,256,1,0,0.1406
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,512,1,0,0.1678
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1024,1,0,0.2300
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1536,1,0,0.2834
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,4096,1,0,0.3617
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,2048,1,0,0.3363
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,6144,1,0,0.4796
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,8192,1,0,0.6030
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,10240,1,0,0.7355
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,12288,1,0,0.8720
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,3072,1,0,0.4401
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,16384,1,0,1.1311
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,4096,1,0,0.5539
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,6144,1,0,0.7908
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,32768,1,0,2.1866
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,8192,1,0,1.0280
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,10240,1,0,1.2697
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,12288,1,0,1.5029
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,16384,1,0,1.9811
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,16,1,0,0.1224
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,32,1,0,0.1216
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,32768,1,0,4.0888
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,64,1,0,0.1261
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,256,1,0,0.1711
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,128,1,0,0.1400
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,512,1,0,0.2246
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1024,1,0,0.3238
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1536,1,0,0.4248
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,2048,1,0,0.5276
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,3072,1,0,0.7540
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,4096,1,0,0.9762
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,6144,1,0,1.4247
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,8192,1,0,1.8817
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,10240,1,0,2.3459
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,12288,1,0,2.8211
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,16,1,0,0.1218
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,16384,1,0,3.7828
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,32,1,0,0.1256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,64,1,0,0.1391
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,128,1,0,0.1631
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,256,1,0,0.2220
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,512,1,0,0.3180
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,32768,1,0,7.7980
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1024,1,0,0.5187
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1536,1,0,0.7354
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,2048,1,0,0.9512
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,3072,1,0,1.3862
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,4096,1,0,1.8284
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,6144,1,0,2.7283
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,8192,1,0,3.6288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,16,1,0,0.1257
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,10240,1,0,4.5509
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,32,1,0,0.1378
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,12288,1,0,5.4563
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,64,1,0,0.1678
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,128,1,0,0.2179
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,16384,1,0,7.3099
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,256,1,0,0.3129
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,512,1,0,0.5083
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1024,1,0,0.9386
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1536,1,0,1.3665
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,2048,1,0,1.8048
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,3072,1,0,2.6836
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,4096,1,0,3.5575
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,16,1,0,0.1380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,32,1,0,0.1702
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,64,1,0,0.2203
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,6144,1,0,5.2998
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,8192,1,0,7.0640
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,128,1,0,0.3118
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,256,1,0,0.5053
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,512,1,0,0.9325
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1024,1,0,1.7904
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1536,1,0,2.6551
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,2048,1,0,3.5143
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,16,1,0,0.1657
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,32,1,0,0.2179
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,3072,1,0,5.2194
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,64,1,0,0.3115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,4096,1,0,6.9390
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,128,1,0,0.5028
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,256,1,0,0.9294
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,512,1,0,1.7826
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1024,1,0,3.4919
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,16,1,0,0.2187
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,32,1,0,0.3104
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1536,1,0,5.1796
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,64,1,0,0.5050
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,128,1,0,0.9261
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,2048,1,0,6.8898
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,256,1,0,1.7795
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,512,1,0,3.4762
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,16,1,0,0.3131
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,32,1,0,0.5090
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1024,1,0,6.8369
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,64,1,0,0.9288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,128,1,0,1.7838
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,16,1,0,0.1683
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,256,1,0,3.4750
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,32,1,0,0.1715
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,512,1,0,6.8257
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,128,1,0,0.2087
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,256,1,0,0.2578
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,512,1,0,0.3396
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,1024,1,0,0.5603
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,64,1,0,0.1876
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,1536,1,0,0.7902
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,2048,1,0,1.0425
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,3072,1,0,1.6061
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,4096,1,0,2.1522
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,6144,1,0,3.3909
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,8192,1,0,4.7523
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,10240,1,0,6.3014
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,16,1,0,0.1793
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,32,1,0,0.1866
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,12288,1,0,7.9619
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,64,1,0,0.2097
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,128,1,0,0.2571
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,16384,1,0,11.9680
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,256,1,0,0.3349
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,512,1,0,0.5451
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,1024,1,0,0.9952
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,1536,1,0,1.5056
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,2048,1,0,1.9826
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,3072,1,0,3.0419
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,4096,1,0,4.1461
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,6144,1,0,6.5916
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,8192,1,0,9.5426
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,32768,1,0,33.3607
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,16,1,0,0.1938
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,10240,1,0,12.6236
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,32,1,0,0.2161
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,64,1,0,0.2655
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,128,1,0,0.3360
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,12288,1,0,15.7243
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,256,1,0,0.5388
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,512,1,0,0.9740
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,1024,1,0,1.8980
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,16384,1,0,23.6876
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,1536,1,0,2.8541
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,2048,1,0,3.8178
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,3072,1,0,5.9093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,4096,1,0,8.3456
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,6144,1,0,13.0260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,16,1,0,0.2250
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,8192,1,0,18.9114
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,32,1,0,0.2725
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,10240,1,0,25.0500
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,64,1,0,0.3458
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,128,1,0,0.5439
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,256,1,0,0.9584
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,12288,1,0,31.8826
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,32768,1,0,66.2760
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,512,1,0,1.8523
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,1024,1,0,3.6535
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,1536,1,0,5.5496
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,2048,1,0,7.7104
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,16384,1,0,47.1195
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,3072,1,0,11.6809
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,4096,1,0,16.5704
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,16,1,0,0.2915
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,32,1,0,0.3615
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,6144,1,0,26.4725
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,64,1,0,0.5616
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,128,1,0,0.9693
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,256,1,0,1.8286
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,8192,1,0,37.6015
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,512,1,0,3.5682
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,1024,1,0,7.3977
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,10240,1,0,49.8896
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,1536,1,0,10.9624
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,2048,1,0,15.3008
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,12288,1,0,61.1828
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,3072,1,0,23.8213
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,16,1,0,0.4009
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,32,1,0,0.5909
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,64,1,0,1.0073
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,128,1,0,1.8526
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,4096,1,0,32.9616
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,32768,1,0,129.2597
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,256,1,0,3.5220
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,512,1,0,7.2277
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,16384,1,0,91.0095
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,1024,1,0,14.6629
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,6144,1,0,50.5119
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,16,1,0,0.6718
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,1536,1,0,22.4441
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,32,1,0,1.0702
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,64,1,0,1.9256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,128,1,0,3.5680
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,2048,1,0,30.4395
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,8192,1,0,72.0247
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,256,1,0,7.1423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,3072,1,0,45.2096
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,512,1,0,14.3180
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,16,1,0,1.2269
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,32,1,0,2.0560
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,64,1,0,3.7180
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,128,1,0,7.2341
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,1024,1,0,29.1534
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,256,1,0,14.1368
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,4096,1,0,62.7332
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,16,1,0,2.3626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,1536,1,0,42.4354
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,32,1,0,3.9741
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,64,1,0,7.5362
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,512,1,0,28.4987
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,2048,1,0,57.7527
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,16,1,0,0.1505
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,32,1,0,0.1544
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,64,1,0,0.1633
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,128,1,0,14.3345
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,128,1,0,0.1735
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,256,1,0,0.2034
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,512,1,0,0.2556
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,1024,1,0,0.3786
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,1536,1,0,0.5143
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,2048,1,0,0.6558
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,3072,1,0,0.9651
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,4096,1,0,1.2850
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,6144,1,0,2.0159
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,256,1,0,28.1407
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,8192,1,0,2.7980
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,1024,1,0,55.1891
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,10240,1,0,3.6560
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,12288,1,0,4.5878
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,16384,1,0,6.6837
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,16,1,0,0.1525
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,32,1,0,0.1609
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,64,1,0,0.1705
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,128,1,0,0.2013
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,256,1,0,0.2493
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,512,1,0,0.3701
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,1024,1,0,0.6293
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,32768,1,0,18.2012
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,1536,1,0,0.9125
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,2048,1,0,1.1961
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,3072,1,0,1.8288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,4096,1,0,2.4742
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,6144,1,0,3.8763
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,512,1,0,53.8479
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,8192,1,0,5.4370
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,10240,1,0,7.1421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,16,1,0,0.1622
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,12288,1,0,8.9855
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,32,1,0,0.1748
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,64,1,0,0.2040
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,128,1,0,0.2535
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,256,1,0,0.3626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,16384,1,0,13.3197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,512,1,0,0.6161
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,1024,1,0,1.1466
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,1536,1,0,1.7280
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,2048,1,0,2.3044
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,3072,1,0,3.5233
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,4096,1,0,4.8288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,6144,1,0,7.6009
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,8192,1,0,10.9080
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,32768,1,0,36.0197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,16,1,0,0.1797
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,32,1,0,0.2111
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,10240,1,0,14.3385
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,64,1,0,0.2556
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,128,1,0,0.3676
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,12288,1,0,17.7298
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,256,1,0,0.6120
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,512,1,0,1.1247
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,1024,1,0,2.2210
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,1536,1,0,3.3438
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,2048,1,0,4.4993
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,16384,1,0,26.3461
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,3072,1,0,6.9243
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,4096,1,0,9.6907
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,16,1,0,0.2163
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,6144,1,0,15.0496
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,32,1,0,0.2649
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,64,1,0,0.3726
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,128,1,0,0.6095
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,8192,1,0,21.6042
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,256,1,0,1.1106
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,512,1,0,2.1724
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,1024,1,0,4.3340
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,10240,1,0,28.4193
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,1536,1,0,6.5672
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,12288,1,0,35.8144
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,2048,1,0,9.0809
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,32768,1,0,71.5803
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,16,1,0,0.2841
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,3072,1,0,13.6825
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,32,1,0,0.3898
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,64,1,0,0.6316
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,4096,1,0,19.2210
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,16384,1,0,52.4069
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,128,1,0,1.1204
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,256,1,0,2.1510
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,512,1,0,4.2518
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,6144,1,0,30.4901
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,1024,1,0,8.7477
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,16,1,0,0.4276
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,1536,1,0,12.9783
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,32,1,0,0.6622
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,64,1,0,1.1568
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,2048,1,0,17.9546
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,8192,1,0,42.9474
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,128,1,0,2.1762
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,256,1,0,4.2078
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,3072,1,0,27.7629
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,512,1,0,8.5859
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,16,1,0,0.7392
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,32,1,0,1.2229
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,1024,1,0,17.3166
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,64,1,0,2.2476
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,4096,1,0,38.2260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,128,1,0,4.2527
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,256,1,0,8.5034
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,16,1,0,1.3753
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,1536,1,0,26.3849
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,32,1,0,2.3779
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,512,1,0,17.0027
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,64,1,0,4.3941
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,2048,1,0,35.7732
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,128,1,0,8.5909
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,16,1,0,0.1306
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,32,1,0,0.1378
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,64,1,0,0.1377
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,128,1,0,0.1469
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,256,1,0,0.1722
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,512,1,0,0.2032
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,1024,1,0,0.2750
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,1536,1,0,0.3523
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,256,1,0,16.8112
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,2048,1,0,0.4379
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,3072,1,0,0.6105
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,1024,1,0,34.4348
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,4096,1,0,0.7899
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,6144,1,0,1.1952
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,8192,1,0,1.6429
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,10240,1,0,2.1375
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,12288,1,0,2.6756
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,16384,1,0,3.8418
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,16,1,0,0.1377
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,32,1,0,0.1380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,64,1,0,0.1467
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,512,1,0,33.8040
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,128,1,0,0.1675
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,256,1,0,0.2012
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,512,1,0,0.2693
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,32768,1,0,9.9710
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,1024,1,0,0.4207
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,1536,1,0,0.5765
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,2048,1,0,0.7382
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,3072,1,0,1.0914
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,4096,1,0,1.4681
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,6144,1,0,2.2987
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,8192,1,0,3.1951
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,10240,1,0,4.1476
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,12288,1,0,5.1827
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,16,1,0,0.1383
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,32,1,0,0.1462
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,16384,1,0,7.4803
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,64,1,0,0.1637
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,128,1,0,0.2001
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,256,1,0,0.2634
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,512,1,0,0.4108
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,1024,1,0,0.7098
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,1536,1,0,1.0353
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,2048,1,0,1.3756
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,3072,1,0,2.1091
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,4096,1,0,2.8720
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,6144,1,0,4.4759
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,32768,1,0,19.7601
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,8192,1,0,6.2298
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,10240,1,0,8.1173
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,16,1,0,0.1464
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,32,1,0,0.1704
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,12288,1,0,10.1409
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,64,1,0,0.2009
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,128,1,0,0.2654
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,256,1,0,0.4058
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,16384,1,0,14.8878
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,512,1,0,0.6953
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,1024,1,0,1.3282
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,1536,1,0,2.0148
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,2048,1,0,2.7028
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,3072,1,0,4.1110
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,4096,1,0,5.6184
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,6144,1,0,8.7781
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,16,1,0,0.1732
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,32,1,0,0.2079
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,8192,1,0,12.4592
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,64,1,0,0.2718
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,128,1,0,0.4092
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,256,1,0,0.6894
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,10240,1,0,16.2883
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,32768,1,0,39.0690
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,512,1,0,1.3036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,12288,1,0,20.0705
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,1024,1,0,2.6166
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,1536,1,0,3.9281
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,2048,1,0,5.2883
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,16,1,0,0.2165
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,3072,1,0,8.1136
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,16384,1,0,29.4636
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,32,1,0,0.2807
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,64,1,0,0.4185
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,4096,1,0,11.2748
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,128,1,0,0.6912
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,256,1,0,1.2910
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,512,1,0,2.5730
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,6144,1,0,17.3381
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,1024,1,0,5.1316
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,1536,1,0,7.7454
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,8192,1,0,24.7043
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,16,1,0,0.2972
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,2048,1,0,10.6059
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,32,1,0,0.4338
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,64,1,0,0.7096
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,128,1,0,1.3011
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,3072,1,0,16.0442
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,256,1,0,2.5458
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,512,1,0,5.0353
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,4096,1,0,22.2913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,16,1,0,0.4714
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,1024,1,0,10.3099
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,32,1,0,0.7432
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,64,1,0,1.3393
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,128,1,0,2.5722
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,1536,1,0,15.2839
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,16,1,0,0.8218
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,256,1,0,5.0008
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,2048,1,0,21.0809
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,32,1,0,1.4021
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,64,1,0,2.6475
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,512,1,0,10.1128
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,128,1,0,5.0361
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,32,1,0,0.1287
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,16,1,0,0.1258
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,64,1,0,0.1341
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,128,1,0,0.1360
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,256,1,0,0.1464
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,1024,1,0,20.4038
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,512,1,0,0.1692
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,256,1,0,10.0529
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,1024,1,0,0.2277
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,1536,1,0,0.2753
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,2048,1,0,0.3258
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,3072,1,0,0.4387
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,4096,1,0,0.5538
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,6144,1,0,0.8017
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,8192,1,0,1.0727
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,10240,1,0,1.3683
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,12288,1,0,1.6982
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,512,1,0,20.1295
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,16384,1,0,2.3962
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,16,1,0,0.1298
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,32,1,0,0.1321
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,32768,1,0,5.9249
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,128,1,0,0.1471
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,256,1,0,0.1747
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,512,1,0,0.2204
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,64,1,0,0.1378
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,1024,1,0,0.3145
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,1536,1,0,0.4166
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,2048,1,0,0.5193
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,3072,1,0,0.7379
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,4096,1,0,0.9683
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,6144,1,0,1.4843
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,8192,1,0,2.0445
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,16,1,0,0.1317
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,10240,1,0,2.6359
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,12288,1,0,3.2724
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,32,1,0,0.1362
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,64,1,0,0.1453
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,16384,1,0,4.6364
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,128,1,0,0.1661
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,256,1,0,0.2180
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,512,1,0,0.3109
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,1024,1,0,0.5023
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,1536,1,0,0.7048
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,32768,1,0,11.5088
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,2048,1,0,0.9159
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,3072,1,0,1.3848
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,4096,1,0,1.8688
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,6144,1,0,2.8945
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,8192,1,0,3.9835
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,10240,1,0,5.1312
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,16,1,0,0.1345
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,32,1,0,0.1447
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,12288,1,0,6.3576
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,64,1,0,0.1679
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,128,1,0,0.2166
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,256,1,0,0.3077
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,16384,1,0,9.0154
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,512,1,0,0.4941
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,1024,1,0,0.8877
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,1536,1,0,1.3263
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,2048,1,0,1.7749
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,3072,1,0,2.7051
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,4096,1,0,3.6610
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,6144,1,0,5.6572
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,32768,1,0,22.8368
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,8192,1,0,7.7707
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,16,1,0,0.1494
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,10240,1,0,10.0486
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,32,1,0,0.1748
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,64,1,0,0.2189
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,128,1,0,0.3087
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,12288,1,0,12.4554
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,256,1,0,0.4876
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,512,1,0,0.8742
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,1024,1,0,1.7305
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,16384,1,0,17.9820
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,1536,1,0,2.6036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,2048,1,0,3.4948
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,3072,1,0,5.2894
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,16,1,0,0.1752
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,32,1,0,0.2242
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,64,1,0,0.3161
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,4096,1,0,7.1709
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,128,1,0,0.4901
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,256,1,0,0.8690
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,512,1,0,1.7077
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,1024,1,0,3.4024
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,6144,1,0,11.1116
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,8192,1,0,15.5184
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,1536,1,0,5.1053
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,2048,1,0,6.8470
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,16,1,0,0.2317
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,32,1,0,0.3230
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,64,1,0,0.4986
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,128,1,0,0.8726
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,3072,1,0,10.4168
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,256,1,0,1.6931
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,4096,1,0,14.3402
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,512,1,0,3.3592
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,16,1,0,0.3410
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,1024,1,0,6.6719
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,32,1,0,0.5157
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,64,1,0,0.8886
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,1536,1,0,10.0466
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,128,1,0,1.7040
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,256,1,0,3.3391
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,2048,1,0,13.7218
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,16,1,0,0.5538
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,512,1,0,6.5913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,32,1,0,0.9222
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,64,1,0,1.7413
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,128,1,0,3.3605
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,16,1,0,0.1235
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,256,1,0,6.5409
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,1024,1,0,13.3911
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,64,1,0,0.1319
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,32,1,0,0.1277
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,128,1,0,0.1337
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,256,1,0,0.1420
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,512,1,0,0.1616
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,1024,1,0,0.2031
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,1536,1,0,0.2346
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,2048,1,0,0.2800
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,3072,1,0,0.3582
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,4096,1,0,0.4346
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,6144,1,0,0.6091
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,512,1,0,13.2374
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,8192,1,0,0.8054
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,10240,1,0,1.0012
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,12288,1,0,1.2206
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,16384,1,0,1.6828
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,16,1,0,0.1302
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,32,1,0,0.1297
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,64,1,0,0.1342
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,128,1,0,0.1420
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,32768,1,0,3.9091
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,256,1,0,0.1562
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,512,1,0,0.1985
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,1024,1,0,0.2690
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,1536,1,0,0.3428
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,2048,1,0,0.4117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,3072,1,0,0.5681
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,4096,1,0,0.7381
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,6144,1,0,1.0951
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,8192,1,0,1.4789
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,10240,1,0,1.8802
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,12288,1,0,2.3029
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,16384,1,0,3.2116
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,16,1,0,0.1297
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,32,1,0,0.1340
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,64,1,0,0.1413
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,128,1,0,0.1589
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,32768,1,0,7.5152
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,256,1,0,0.1935
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,512,1,0,0.2632
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,1024,1,0,0.4015
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,1536,1,0,0.5471
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,2048,1,0,0.7051
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,3072,1,0,1.0328
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,4096,1,0,1.3747
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,6144,1,0,2.0907
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,8192,1,0,2.8501
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,10240,1,0,3.6511
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,12288,1,0,4.4752
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,16,1,0,0.1358
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,16384,1,0,6.2225
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,32,1,0,0.1422
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,64,1,0,0.1535
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,128,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,256,1,0,0.2634
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,512,1,0,0.3964
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,1024,1,0,0.6871
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,1536,1,0,0.9985
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,32768,1,0,14.6606
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,2048,1,0,1.3232
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,3072,1,0,1.9833
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,4096,1,0,2.6782
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,6144,1,0,4.0937
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,8192,1,0,5.5696
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,16,1,0,0.1400
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,32,1,0,0.1587
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,10240,1,0,7.1235
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,64,1,0,0.1919
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,12288,1,0,8.7307
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,128,1,0,0.2608
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,256,1,0,0.3925
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,512,1,0,0.6772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,1024,1,0,1.2937
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,16384,1,0,12.1867
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,1536,1,0,1.9324
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,2048,1,0,2.5860
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,3072,1,0,3.9094
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,16,1,0,0.1660
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,4096,1,0,5.2496
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,32,1,0,0.1943
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,64,1,0,0.2619
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,6144,1,0,8.0064
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,128,1,0,0.3963
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,256,1,0,0.6727
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,8192,1,0,10.9219
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,512,1,0,1.2821
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,1024,1,0,2.5379
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,1536,1,0,3.8093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,16,1,0,0.1999
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,2048,1,0,5.0850
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,32,1,0,0.2659
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,64,1,0,0.4015
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,3072,1,0,7.6581
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,128,1,0,0.6758
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,256,1,0,1.2739
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,4096,1,0,10.3020
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,512,1,0,2.5197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,1024,1,0,4.9935
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,16,1,0,0.2737
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,32,1,0,0.4075
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,64,1,0,0.6835
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,1536,1,0,7.4753
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,128,1,0,1.2776
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,2048,1,0,10.0034
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,256,1,0,2.5033
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,16,1,0,0.4273
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,512,1,0,4.9507
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,32,1,0,0.7019
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,64,1,0,1.2967
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,128,1,0,2.5108
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,1024,1,0,9.8205
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,16,1,0,0.1238
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,32,1,0,0.1299
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,256,1,0,4.9295
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,64,1,0,0.1279
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,128,1,0,0.1322
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,256,1,0,0.1398
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,512,1,0,0.1568
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,1024,1,0,0.1937
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,1536,1,0,0.2247
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,512,1,0,9.7509
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,2048,1,0,0.2588
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,3072,1,0,0.3248
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,4096,1,0,0.3901
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,6144,1,0,0.5240
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,8192,1,0,0.6632
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,10240,1,0,0.8162
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,12288,1,0,0.9907
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,16384,1,0,1.3439
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,16,1,0,0.1260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,32,1,0,0.1319
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,64,1,0,0.1357
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,32768,1,0,2.9106
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,128,1,0,0.1380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,256,1,0,0.1559
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,512,1,0,0.1891
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,1024,1,0,0.2489
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,1536,1,0,0.3080
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,2048,1,0,0.3692
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,3072,1,0,0.4971
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,4096,1,0,0.6192
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,6144,1,0,0.9137
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,8192,1,0,1.2179
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,10240,1,0,1.5280
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,12288,1,0,1.8417
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,16384,1,0,2.5034
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,16,1,0,0.1338
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,32,1,0,0.1323
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,64,1,0,0.1396
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,128,1,0,0.1524
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,32768,1,0,5.5354
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,512,1,0,0.2441
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,256,1,0,0.1827
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,1024,1,0,0.3585
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,1536,1,0,0.4728
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,2048,1,0,0.5964
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,3072,1,0,0.8717
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,4096,1,0,1.1504
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,6144,1,0,1.7144
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,8192,1,0,2.3021
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,10240,1,0,2.9058
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,16,1,0,0.1339
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,12288,1,0,3.5328
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,32,1,0,0.1367
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,64,1,0,0.1501
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,16384,1,0,4.8270
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,128,1,0,0.1849
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,256,1,0,0.2396
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,512,1,0,0.3544
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,1024,1,0,0.5870
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,1536,1,0,0.8518
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,2048,1,0,1.1182
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,32768,1,0,10.7337
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,3072,1,0,1.6493
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,4096,1,0,2.1947
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,6144,1,0,3.3251
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,8192,1,0,4.4844
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,16,1,0,0.1381
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,10240,1,0,5.6717
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,32,1,0,0.1480
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,64,1,0,0.1811
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,12288,1,0,6.8831
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,128,1,0,0.2375
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,256,1,0,0.3520
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,512,1,0,0.5824
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,1024,1,0,1.0988
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,16384,1,0,9.4361
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,1536,1,0,1.6180
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,2048,1,0,2.1502
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,3072,1,0,3.2187
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,4096,1,0,4.3029
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,16,1,0,0.1526
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,32,1,0,0.1828
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,64,1,0,0.2364
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,6144,1,0,6.5060
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,128,1,0,0.3492
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,256,1,0,0.5789
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,512,1,0,1.0929
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,8192,1,0,8.7703
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,1024,1,0,2.1164
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,1536,1,0,3.1655
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,2048,1,0,4.2105
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,16,1,0,0.1860
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,32,1,0,0.2393
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,64,1,0,0.3497
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,128,1,0,0.5859
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,3072,1,0,6.3148
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,256,1,0,1.0872
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,512,1,0,2.1030
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,4096,1,0,8.4610
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,1024,1,0,4.1705
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,16,1,0,0.2466
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,32,1,0,0.3571
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,1536,1,0,6.2253
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,64,1,0,0.5863
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,128,1,0,1.0910
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,256,1,0,2.0936
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,512,1,0,4.1393
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,2048,1,0,8.2814
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,16,1,0,0.3634
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,32,1,0,0.5931
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,64,1,0,1.0985
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,1024,1,0,8.1980
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,128,1,0,2.1009
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,16,1,0,0.1177
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,256,1,0,4.1235
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,32,1,0,0.1295
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,128,1,0,0.1321
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,256,1,0,0.1423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,64,1,0,0.1259
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,512,1,0,0.1543
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,1024,1,0,0.1887
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,512,1,0,8.1641
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,1536,1,0,0.2202
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,2048,1,0,0.2520
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,3072,1,0,0.3098
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,4096,1,0,0.3711
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,6144,1,0,0.4962
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,8192,1,0,0.6249
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,10240,1,0,0.7527
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,12288,1,0,0.8913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,16384,1,0,1.1643
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,32768,1,0,2.4178
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,64,1,0,0.1318
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,16,1,0,0.1239
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,32,1,0,0.1317
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,128,1,0,0.1362
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,256,1,0,0.1556
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,512,1,0,0.1801
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,1024,1,0,0.2400
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,1536,1,0,0.2949
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,2048,1,0,0.3490
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,3072,1,0,0.4602
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,4096,1,0,0.5825
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,6144,1,0,0.8280
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,8192,1,0,1.0759
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,10240,1,0,1.3398
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,12288,1,0,1.6071
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,16384,1,0,2.1644
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,16,1,0,0.1276
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,32,1,0,0.1346
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,32768,1,0,4.5344
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,64,1,0,0.1361
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,128,1,0,0.1471
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,256,1,0,0.1773
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,512,1,0,0.2363
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,1024,1,0,0.3396
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,1536,1,0,0.4476
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,2048,1,0,0.5556
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,3072,1,0,0.7946
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,4096,1,0,1.0333
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,6144,1,0,1.5281
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,8192,1,0,2.0369
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,10240,1,0,2.5569
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,12288,1,0,3.0691
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,16,1,0,0.1338
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,32,1,0,0.1380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,16384,1,0,4.1346
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,64,1,0,0.1498
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,128,1,0,0.1799
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,256,1,0,0.2342
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,512,1,0,0.3348
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,32768,1,0,8.7377
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,1024,1,0,0.5446
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,1536,1,0,0.7804
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,2048,1,0,1.0130
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,3072,1,0,1.4856
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,4096,1,0,1.9753
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,6144,1,0,2.9422
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,8192,1,0,3.9251
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,16,1,0,0.1361
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,32,1,0,0.1464
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,10240,1,0,4.9380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,64,1,0,0.1817
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,12288,1,0,5.9536
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,128,1,0,0.2308
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,16384,1,0,8.0373
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,256,1,0,0.3307
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,512,1,0,0.5417
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,1024,1,0,0.9999
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,1536,1,0,1.4661
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,2048,1,0,1.9377
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,3072,1,0,2.8797
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,4096,1,0,3.8296
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,16,1,0,0.1501
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,32,1,0,0.1755
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,64,1,0,0.2315
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,6144,1,0,5.7398
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,128,1,0,0.3293
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,256,1,0,0.5374
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,8192,1,0,7.6834
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,512,1,0,0.9958
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,1024,1,0,1.9201
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,1536,1,0,2.8485
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,2048,1,0,3.7718
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,16,1,0,0.1768
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,32,1,0,0.2282
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,3072,1,0,5.6444
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,64,1,0,0.3282
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,128,1,0,0.5348
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,256,1,0,0.9934
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,4096,1,0,7.5091
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,512,1,0,1.9107
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,1024,1,0,3.7483
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,16,1,0,0.2341
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,32,1,0,0.3316
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,64,1,0,0.5430
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,1536,1,0,5.5785
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,256,1,0,1.9074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,2048,1,0,7.4229
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,128,1,0,0.9938
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,512,1,0,3.7349
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,16,1,0,0.3367
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,32,1,0,0.5474
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,64,1,0,0.9997
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,1024,1,0,7.3776
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,128,1,0,1.9131
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,16,1,0,0.1154
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,256,1,0,3.7220
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,32,1,0,0.1175
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,64,1,0,0.1203
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,128,1,0,0.1318
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,256,1,0,0.1358
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,512,1,0,7.3534
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,512,1,0,0.1504
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,1024,1,0,0.1851
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,1536,1,0,0.2152
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,2048,1,0,0.2462
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,3072,1,0,0.3055
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,4096,1,0,0.3621
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,6144,1,0,0.4753
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,8192,1,0,0.5987
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,10240,1,0,0.7263
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,12288,1,0,0.8581
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,16384,1,0,1.1122
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,16,1,0,0.1226
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,32768,1,0,2.1462
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,32,1,0,0.1196
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,128,1,0,0.1320
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,64,1,0,0.1275
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,256,1,0,0.1484
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,512,1,0,0.1825
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,1024,1,0,0.2368
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,1536,1,0,0.2890
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,2048,1,0,0.3406
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,3072,1,0,0.4413
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,4096,1,0,0.5544
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,6144,1,0,0.7906
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,8192,1,0,1.0265
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,10240,1,0,1.2666
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,12288,1,0,1.4954
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,16384,1,0,1.9741
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,16,1,0,0.1205
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,32,1,0,0.1264
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,64,1,0,0.1344
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,32768,1,0,4.0142
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,128,1,0,0.1460
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,256,1,0,0.1760
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,512,1,0,0.2316
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,1024,1,0,0.3293
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,1536,1,0,0.4299
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,2048,1,0,0.5379
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,3072,1,0,0.7610
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,4096,1,0,0.9826
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,6144,1,0,1.4303
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,8192,1,0,1.8865
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,10240,1,0,2.3511
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,12288,1,0,2.8195
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,16384,1,0,3.7695
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,16,1,0,0.1317
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,32,1,0,0.1359
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,64,1,0,0.1441
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,32768,1,0,7.7068
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,128,1,0,0.1771
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,256,1,0,0.2272
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,512,1,0,0.3242
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,1024,1,0,0.5224
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,1536,1,0,0.7461
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,2048,1,0,0.9626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,3072,1,0,1.4009
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,4096,1,0,1.8420
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,6144,1,0,2.7457
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,8192,1,0,3.6386
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,16,1,0,0.1318
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,10240,1,0,4.5445
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,32,1,0,0.1462
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,12288,1,0,5.4506
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,64,1,0,0.1738
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,16384,1,0,7.3070
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,128,1,0,0.2248
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,256,1,0,0.3209
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,512,1,0,0.5200
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,1024,1,0,0.9512
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,1536,1,0,1.3845
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,2048,1,0,1.8190
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,16,1,0,0.1444
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,3072,1,0,2.7029
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,4096,1,0,3.5720
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,32,1,0,0.1690
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,6144,1,0,5.3203
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,64,1,0,0.2245
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,128,1,0,0.3181
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,8192,1,0,7.0896
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,256,1,0,0.5144
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,512,1,0,0.9443
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,1024,1,0,1.8116
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,1536,1,0,2.6793
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,16,1,0,0.1708
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,2048,1,0,3.5400
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,32,1,0,0.2261
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,3072,1,0,5.2672
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,64,1,0,0.3182
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,128,1,0,0.5127
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,256,1,0,0.9434
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,4096,1,0,6.9888
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,512,1,0,1.8045
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,1024,1,0,3.5203
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,16,1,0,0.2254
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,1536,1,0,5.2272
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,32,1,0,0.3197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,64,1,0,0.5134
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,128,1,0,0.9427
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,2048,1,0,6.9445
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,256,1,0,1.8021
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,16,1,0,0.3223
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,512,1,0,3.5113
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,32,1,0,0.5176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,1024,1,0,6.9133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,64,1,0,0.9448
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,128,1,0,1.8020
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,256,1,0,3.5123
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,16,1,0,0.1333
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,32,1,0,0.1348
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,512,1,0,6.9029
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,64,1,0,0.1423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,128,1,0,0.1521
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,256,1,0,0.1729
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,512,1,0,0.2426
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1536,1,0,0.5391
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1024,1,0,0.3854
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,2048,1,0,0.7202
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,3072,1,0,1.1376
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,4096,1,0,1.5985
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,6144,1,0,2.6151
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,8192,1,0,3.8104
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,10240,1,0,5.1647
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,12288,1,0,6.6698
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,16,1,0,0.1421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,32,1,0,0.1449
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,64,1,0,0.1566
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,128,1,0,0.1747
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,16384,1,0,10.3674
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,256,1,0,0.2327
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,512,1,0,0.3684
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1024,1,0,0.6589
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1536,1,0,1.0141
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,2048,1,0,1.3844
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,3072,1,0,2.1653
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,4096,1,0,3.0420
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,6144,1,0,5.0726
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,8192,1,0,7.6816
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,10240,1,0,10.4372
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,16,1,0,0.1510
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,32768,1,0,31.4949
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,32,1,0,0.1588
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,12288,1,0,13.1855
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,64,1,0,0.1811
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,128,1,0,0.2367
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,256,1,0,0.3616
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,512,1,0,0.6294
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,16384,1,0,20.4859
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1024,1,0,1.2767
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1536,1,0,1.9297
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,2048,1,0,2.6327
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,3072,1,0,4.2015
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,4096,1,0,6.1921
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,6144,1,0,10.0361
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,16,1,0,0.1705
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,32,1,0,0.1889
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,8192,1,0,15.2063
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,64,1,0,0.2449
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,128,1,0,0.3591
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,10240,1,0,20.6807
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,256,1,0,0.6181
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,512,1,0,1.2202
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1024,1,0,2.4229
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,12288,1,0,26.7388
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1536,1,0,3.7542
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,2048,1,0,5.3743
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,32768,1,0,62.5823
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,3072,1,0,8.3302
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,16384,1,0,40.6785
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,4096,1,0,12.2388
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,16,1,0,0.2090
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,32,1,0,0.2617
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,6144,1,0,20.5111
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,64,1,0,0.3789
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,128,1,0,0.6213
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,256,1,0,1.1971
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,512,1,0,2.3124
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,8192,1,0,30.2529
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1024,1,0,4.9673
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1536,1,0,7.4251
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,10240,1,0,41.1373
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,2048,1,0,10.6406
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,3072,1,0,17.1489
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,12288,1,0,51.0718
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,16,1,0,0.2992
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,32,1,0,0.4140
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,64,1,0,0.6554
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,4096,1,0,24.3494
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,128,1,0,1.1985
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,256,1,0,2.2740
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,512,1,0,4.7693
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,6144,1,0,38.6481
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1024,1,0,9.8587
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,32768,1,0,121.9044
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,16384,1,0,78.2406
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1536,1,0,15.3313
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,16,1,0,0.4933
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,32,1,0,0.7309
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,64,1,0,1.2736
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,2048,1,0,21.1763
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,128,1,0,2.2849
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,256,1,0,4.6910
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,8192,1,0,57.3350
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,3072,1,0,31.9376
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,512,1,0,9.4559
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,16,1,0,0.8834
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,32,1,0,1.4259
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1024,1,0,19.5900
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,64,1,0,2.4379
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,128,1,0,4.6929
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,256,1,0,9.3061
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,4096,1,0,45.6234
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1536,1,0,28.3241
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,16,1,0,1.7355
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,32,1,0,2.7367
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,512,1,0,18.7839
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,64,1,0,5.0141
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,2048,1,0,39.2824
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,16,1,0,0.1094
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,128,1,0,9.3048
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,32,1,0,0.1152
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,64,1,0,0.1173
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,128,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,256,1,0,0.1443
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,512,1,0,0.1830
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1024,1,0,0.2735
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1536,1,0,0.3689
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,2048,1,0,0.4721
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,3072,1,0,0.7005
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,4096,1,0,0.9616
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1024,1,0,36.1951
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,6144,1,0,1.5650
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,256,1,0,18.4635
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,8192,1,0,2.2538
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,10240,1,0,3.0096
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,12288,1,0,3.8456
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,16,1,0,0.1174
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,16384,1,0,5.7432
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,32,1,0,0.1159
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,64,1,0,0.1256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,512,1,0,34.5496
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,128,1,0,0.1400
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,256,1,0,0.1802
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,512,1,0,0.2650
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1024,1,0,0.4367
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1536,1,0,0.6288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,2048,1,0,0.8461
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,3072,1,0,1.3282
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,4096,1,0,1.8519
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,32768,1,0,17.0477
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,6144,1,0,3.0172
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,8192,1,0,4.3742
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,10240,1,0,5.8739
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,16,1,0,0.1216
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,32,1,0,0.1279
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,12288,1,0,7.5149
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,64,1,0,0.1439
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,128,1,0,0.1810
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,256,1,0,0.2568
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,512,1,0,0.4181
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,16384,1,0,11.4525
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1024,1,0,0.7859
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1536,1,0,1.2004
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,2048,1,0,1.6372
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,3072,1,0,2.5666
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,4096,1,0,3.6074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,6144,1,0,5.9252
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,8192,1,0,8.7927
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,16,1,0,0.1336
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,32,1,0,0.1498
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,10240,1,0,11.8062
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,32768,1,0,33.6894
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,64,1,0,0.1857
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,128,1,0,0.2597
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,12288,1,0,14.8358
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,256,1,0,0.4107
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,512,1,0,0.7558
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1024,1,0,1.5278
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1536,1,0,2.3348
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,2048,1,0,3.1954
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,16384,1,0,22.6666
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,3072,1,0,5.0473
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,4096,1,0,7.2985
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,6144,1,0,11.7061
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,16,1,0,0.1576
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,32,1,0,0.1954
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,64,1,0,0.2647
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,128,1,0,0.4088
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,8192,1,0,17.3965
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,256,1,0,0.7408
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,512,1,0,1.4725
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1024,1,0,2.9830
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,10240,1,0,23.4022
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1536,1,0,4.5941
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,2048,1,0,6.4897
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,12288,1,0,30.0425
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,32768,1,0,66.9445
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,3072,1,0,9.9886
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,16,1,0,0.2141
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,32,1,0,0.2838
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,4096,1,0,14.4679
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,64,1,0,0.4287
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,128,1,0,0.7435
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,16384,1,0,45.0792
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,256,1,0,1.4479
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,512,1,0,2.8793
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,6144,1,0,23.8153
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1024,1,0,6.0863
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1536,1,0,9.0955
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,16,1,0,0.3227
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,32,1,0,0.4661
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,64,1,0,0.7777
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,2048,1,0,12.8651
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,8192,1,0,34.5607
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,128,1,0,1.4523
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,256,1,0,2.8400
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,3072,1,0,20.4015
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,512,1,0,5.8715
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,16,1,0,0.5414
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,32,1,0,0.8540
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1024,1,0,12.0629
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,4096,1,0,28.7761
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,64,1,0,1.5275
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,128,1,0,2.8444
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1536,1,0,18.6226
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,256,1,0,5.7933
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,16,1,0,1.0051
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,32,1,0,1.6767
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,512,1,0,11.6415
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,2048,1,0,25.5635
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,64,1,0,2.9957
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,128,1,0,5.8119
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,16,1,0,0.1033
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,32,1,0,0.1100
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,64,1,0,0.1134
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,128,1,0,0.1136
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,256,1,0,0.1217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,512,1,0,0.1500
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,256,1,0,11.4994
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1024,1,0,0.2012
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1536,1,0,0.2580
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,2048,1,0,0.3232
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1024,1,0,23.9827
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,3072,1,0,0.4510
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,4096,1,0,0.5929
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,6144,1,0,0.9235
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,8192,1,0,1.3117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,10240,1,0,1.7524
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,12288,1,0,2.2265
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,512,1,0,23.1733
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,16,1,0,0.1114
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,32,1,0,0.1091
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,16384,1,0,3.2865
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,64,1,0,0.1144
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,128,1,0,0.1196
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,256,1,0,0.1439
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,512,1,0,0.1952
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1024,1,0,0.2991
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,32768,1,0,9.2192
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1536,1,0,0.4108
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,2048,1,0,0.5246
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,3072,1,0,0.7896
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,4096,1,0,1.0969
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,6144,1,0,1.7891
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,8192,1,0,2.5616
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,16,1,0,0.1104
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,10240,1,0,3.3996
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,12288,1,0,4.3099
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,32,1,0,0.1131
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,64,1,0,0.1192
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,16384,1,0,6.3571
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,128,1,0,0.1427
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,256,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,512,1,0,0.2886
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1024,1,0,0.4891
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1536,1,0,0.7187
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,2048,1,0,0.9777
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,3072,1,0,1.5519
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,4096,1,0,2.1625
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,32768,1,0,18.2485
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,6144,1,0,3.4879
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,8192,1,0,4.9782
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,10240,1,0,6.6302
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,16,1,0,0.1176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,12288,1,0,8.4163
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,32,1,0,0.1217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,64,1,0,0.1464
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,128,1,0,0.1930
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,256,1,0,0.2810
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,16384,1,0,12.6593
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,512,1,0,0.4712
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1024,1,0,0.9215
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1536,1,0,1.4233
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,2048,1,0,1.9473
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,3072,1,0,3.0392
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,4096,1,0,4.2202
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,6144,1,0,6.8385
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,16,1,0,0.1297
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,8192,1,0,9.9871
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,32,1,0,0.1511
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,64,1,0,0.1994
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,128,1,0,0.2847
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,10240,1,0,13.2822
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,32768,1,0,36.0258
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,256,1,0,0.4659
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,512,1,0,0.8914
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,12288,1,0,16.6045
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1024,1,0,1.8360
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1536,1,0,2.8025
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,2048,1,0,3.8090
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,16384,1,0,25.0124
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,3072,1,0,5.9617
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,16,1,0,0.1587
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,4096,1,0,8.4958
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,32,1,0,0.2075
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,64,1,0,0.2922
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,128,1,0,0.4616
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,6144,1,0,13.4861
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,256,1,0,0.8773
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,512,1,0,1.7802
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1024,1,0,3.5953
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1536,1,0,5.5024
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,8192,1,0,19.7582
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,2048,1,0,7.6935
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,16,1,0,0.2258
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,32,1,0,0.3102
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,64,1,0,0.4797
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,128,1,0,0.8793
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,3072,1,0,11.7562
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,512,1,0,3.4843
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,256,1,0,1.7561
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,4096,1,0,16.8185
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1024,1,0,7.2828
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,16,1,0,0.3477
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,32,1,0,0.5183
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,64,1,0,0.9099
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1536,1,0,10.8544
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,128,1,0,1.7631
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,256,1,0,3.4524
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,2048,1,0,15.2153
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,16,1,0,0.5940
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,32,1,0,0.9893
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,512,1,0,7.0735
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,64,1,0,1.8372
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,128,1,0,3.4571
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,16,1,0,0.1053
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,32,1,0,0.1046
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,64,1,0,0.1069
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,128,1,0,0.1081
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1024,1,0,14.4091
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,256,1,0,6.9973
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,256,1,0,0.1177
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,512,1,0,0.1302
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1024,1,0,0.1700
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1536,1,0,0.2041
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,2048,1,0,0.2395
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,3072,1,0,0.3309
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,512,1,0,13.9769
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,4096,1,0,0.4207
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,6144,1,0,0.6194
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,8192,1,0,0.8474
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,10240,1,0,1.1060
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,12288,1,0,1.3883
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,16384,1,0,2.0300
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,16,1,0,0.1056
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,32,1,0,0.1053
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,64,1,0,0.1112
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,32768,1,0,5.3993
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,128,1,0,0.1137
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,256,1,0,0.1241
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,512,1,0,0.1627
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1024,1,0,0.2293
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1536,1,0,0.3036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,2048,1,0,0.3771
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,3072,1,0,0.5371
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,4096,1,0,0.7200
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,6144,1,0,1.1439
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,8192,1,0,1.6253
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,10240,1,0,2.1480
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,12288,1,0,2.6970
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,16,1,0,0.1100
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,32,1,0,0.1114
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,16384,1,0,3.9056
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,64,1,0,0.1142
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,128,1,0,0.1218
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,256,1,0,0.1591
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,512,1,0,0.2201
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1024,1,0,0.3559
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1536,1,0,0.4955
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,2048,1,0,0.6538
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,32768,1,0,10.4280
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,3072,1,0,1.0127
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,4096,1,0,1.4071
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,6144,1,0,2.2605
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,8192,1,0,3.1694
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,10240,1,0,4.1593
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,16,1,0,0.1074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,32,1,0,0.1132
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,12288,1,0,5.2262
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,64,1,0,0.1214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,16384,1,0,7.5592
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,128,1,0,0.1586
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,256,1,0,0.2181
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,512,1,0,0.3433
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1024,1,0,0.6177
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1536,1,0,0.9402
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,2048,1,0,1.2924
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,3072,1,0,2.0228
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,4096,1,0,2.7730
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,6144,1,0,4.4054
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,32768,1,0,20.6194
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,16,1,0,0.1172
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,8192,1,0,6.1782
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,32,1,0,0.1256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,10240,1,0,8.1119
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,64,1,0,0.1612
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,128,1,0,0.2200
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,12288,1,0,10.2040
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,256,1,0,0.3361
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,512,1,0,0.5994
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1024,1,0,1.2340
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1536,1,0,1.8957
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,16384,1,0,15.0148
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,2048,1,0,2.5576
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,3072,1,0,3.9500
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,16,1,0,0.1300
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,32,1,0,0.1679
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,4096,1,0,5.4269
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,64,1,0,0.2251
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,128,1,0,0.3389
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,6144,1,0,8.6145
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,256,1,0,0.5944
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,512,1,0,1.2036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1024,1,0,2.4493
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,8192,1,0,12.3627
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1536,1,0,3.7205
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,2048,1,0,5.0162
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,16,1,0,0.1755
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,32,1,0,0.2337
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,64,1,0,0.3465
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,3072,1,0,7.7435
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,128,1,0,0.5913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,256,1,0,1.1914
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,512,1,0,2.3942
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,4096,1,0,10.8642
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1024,1,0,4.8077
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,16,1,0,0.2522
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,32,1,0,0.3656
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,64,1,0,0.6093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1536,1,0,7.2896
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,128,1,0,1.1906
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,2048,1,0,10.0691
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,256,1,0,2.3692
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,16,1,0,0.4036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,32,1,0,0.6476
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,512,1,0,4.6967
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,64,1,0,1.2261
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,128,1,0,2.3806
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1024,1,0,9.6486
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,16,1,0,0.1012
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,32,1,0,0.1013
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,256,1,0,4.6624
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,64,1,0,0.1082
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,128,1,0,0.1073
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,256,1,0,0.1134
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,512,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1024,1,0,0.1533
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1536,1,0,0.1811
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,512,1,0,9.4398
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,2048,1,0,0.2129
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,3072,1,0,0.2709
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,4096,1,0,0.3251
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,6144,1,0,0.4628
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,8192,1,0,0.6261
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,10240,1,0,0.8038
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,12288,1,0,0.9906
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,16384,1,0,1.3877
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,16,1,0,0.1047
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,32,1,0,0.1075
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,64,1,0,0.1070
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,32768,1,0,3.4503
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,128,1,0,0.1113
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,256,1,0,0.1217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,512,1,0,0.1455
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1024,1,0,0.1996
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1536,1,0,0.2504
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,2048,1,0,0.2978
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,3072,1,0,0.4145
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,4096,1,0,0.5483
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,6144,1,0,0.8378
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,8192,1,0,1.1516
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,10240,1,0,1.4899
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,12288,1,0,1.8465
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,16,1,0,0.1052
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,16384,1,0,2.6347
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,32,1,0,0.1072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,64,1,0,0.1132
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,128,1,0,0.1175
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,256,1,0,0.1442
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,512,1,0,0.1938
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,32768,1,0,6.5915
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1024,1,0,0.2863
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1536,1,0,0.3874
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,2048,1,0,0.5051
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,3072,1,0,0.7574
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,4096,1,0,1.0253
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,6144,1,0,1.6064
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,8192,1,0,2.2260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,10240,1,0,2.9000
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,16,1,0,0.1095
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,12288,1,0,3.5922
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,32,1,0,0.1112
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,64,1,0,0.1189
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,16384,1,0,5.0812
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,128,1,0,0.1391
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,256,1,0,0.1899
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,512,1,0,0.2807
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1024,1,0,0.4823
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1536,1,0,0.7176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,2048,1,0,0.9579
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,3072,1,0,1.4721
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,32768,1,0,12.7712
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,4096,1,0,2.0112
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,6144,1,0,3.1568
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,8192,1,0,4.3598
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,10240,1,0,5.6206
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,16,1,0,0.1138
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,12288,1,0,6.9645
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,32,1,0,0.1176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,64,1,0,0.1401
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,128,1,0,0.1867
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,256,1,0,0.2772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,512,1,0,0.4719
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,16384,1,0,9.8793
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1024,1,0,0.9217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1536,1,0,1.4036
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,2048,1,0,1.9007
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,3072,1,0,2.9254
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,4096,1,0,3.9575
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,16,1,0,0.1207
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,32,1,0,0.1421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,6144,1,0,6.1455
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,64,1,0,0.1896
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,128,1,0,0.2781
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,256,1,0,0.4638
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,8192,1,0,8.5020
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,512,1,0,0.9044
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1024,1,0,1.8400
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1536,1,0,2.8000
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,2048,1,0,3.7540
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,16,1,0,0.1489
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,32,1,0,0.1961
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,64,1,0,0.2838
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,3072,1,0,5.7020
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,128,1,0,0.4674
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,4096,1,0,7.7565
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,256,1,0,0.8968
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,512,1,0,1.8071
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1024,1,0,3.6387
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,16,1,0,0.2023
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1536,1,0,5.4716
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,32,1,0,0.2920
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,64,1,0,0.4768
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,128,1,0,0.8967
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,2048,1,0,7.3453
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,256,1,0,1.7976
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,512,1,0,3.5818
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,16,1,0,0.3115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,32,1,0,0.4932
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,64,1,0,0.9160
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1024,1,0,7.1282
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,128,1,0,1.7980
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,256,1,0,3.5587
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,16,1,0,0.1011
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,32,1,0,0.1021
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,64,1,0,0.1037
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,512,1,0,7.0291
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,128,1,0,0.1093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,256,1,0,0.1115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,512,1,0,0.1236
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1024,1,0,0.1488
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1536,1,0,0.1733
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,2048,1,0,0.1990
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,3072,1,0,0.2511
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,4096,1,0,0.2982
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,6144,1,0,0.4005
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,8192,1,0,0.5072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,10240,1,0,0.6348
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,12288,1,0,0.7799
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,16384,1,0,1.0927
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,16,1,0,0.1028
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,32768,1,0,2.4822
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,64,1,0,0.1070
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,128,1,0,0.1094
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,256,1,0,0.1212
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,512,1,0,0.1425
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1024,1,0,0.1861
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1536,1,0,0.2324
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,32,1,0,0.1037
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,2048,1,0,0.2731
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,3072,1,0,0.3606
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,4096,1,0,0.4556
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,6144,1,0,0.6873
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,8192,1,0,0.9401
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,10240,1,0,1.2032
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,12288,1,0,1.4630
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,16,1,0,0.1046
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,16384,1,0,2.0078
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,32,1,0,0.1062
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,32768,1,0,4.6899
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,64,1,0,0.1115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,128,1,0,0.1154
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,256,1,0,0.1400
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,512,1,0,0.1784
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1024,1,0,0.2602
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1536,1,0,0.3413
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,2048,1,0,0.4312
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,3072,1,0,0.6390
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,4096,1,0,0.8628
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,6144,1,0,1.3139
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,8192,1,0,1.7668
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,10240,1,0,2.2654
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,16,1,0,0.1095
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,32,1,0,0.1115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,12288,1,0,2.7803
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,16384,1,0,3.8644
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,64,1,0,0.1156
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,128,1,0,0.1344
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,256,1,0,0.1766
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,512,1,0,0.2541
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1024,1,0,0.4191
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,32768,1,0,9.0044
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1536,1,0,0.6135
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,2048,1,0,0.8170
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,3072,1,0,1.2311
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,4096,1,0,1.6432
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,6144,1,0,2.5340
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,8192,1,0,3.4598
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,16,1,0,0.1115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,10240,1,0,4.4202
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,32,1,0,0.1159
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,64,1,0,0.1358
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,128,1,0,0.1745
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,12288,1,0,5.4113
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,256,1,0,0.2524
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,512,1,0,0.4127
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1024,1,0,0.7950
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,16384,1,0,7.4892
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1536,1,0,1.1907
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,2048,1,0,1.5793
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,3072,1,0,2.4045
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,4096,1,0,3.2491
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,16,1,0,0.1156
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,32,1,0,0.1350
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,6144,1,0,4.9712
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,64,1,0,0.1736
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,128,1,0,0.2486
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,8192,1,0,6.7682
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,256,1,0,0.4087
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,512,1,0,0.7858
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1024,1,0,1.5463
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1536,1,0,2.3354
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,2048,1,0,3.1272
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,16,1,0,0.1380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,32,1,0,0.1765
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,3072,1,0,4.7412
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,64,1,0,0.2514
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,128,1,0,0.4103
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,256,1,0,0.7780
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,4096,1,0,6.3744
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,512,1,0,1.5245
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1024,1,0,3.0730
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,16,1,0,0.1832
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,32,1,0,0.2573
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1536,1,0,4.6141
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,64,1,0,0.4157
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,128,1,0,0.7793
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,2048,1,0,6.1591
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,256,1,0,1.5165
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,512,1,0,3.0368
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,16,1,0,0.2653
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,32,1,0,0.4253
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,64,1,0,0.7872
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1024,1,0,6.0456
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,128,1,0,1.5138
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,256,1,0,3.0290
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,16,1,0,0.0930
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,32,1,0,0.0990
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,64,1,0,0.1072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,128,1,0,0.1073
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,512,1,0,6.0024
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,256,1,0,0.1153
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,512,1,0,0.1231
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1024,1,0,0.1454
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1536,1,0,0.1717
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,2048,1,0,0.1945
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,3072,1,0,0.2423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,4096,1,0,0.2858
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,6144,1,0,0.3815
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,8192,1,0,0.4822
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,10240,1,0,0.5924
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,12288,1,0,0.6990
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,16384,1,0,0.9193
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,32768,1,0,2.0146
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,16,1,0,0.1032
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,32,1,0,0.1031
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,64,1,0,0.1073
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,128,1,0,0.1118
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,256,1,0,0.1194
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,512,1,0,0.1421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1024,1,0,0.1814
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1536,1,0,0.2240
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,2048,1,0,0.2609
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,3072,1,0,0.3426
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,4096,1,0,0.4311
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,6144,1,0,0.6223
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,8192,1,0,0.8178
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,10240,1,0,1.0254
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,12288,1,0,1.2511
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,16,1,0,0.1039
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,16384,1,0,1.7115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,32,1,0,0.1071
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,64,1,0,0.1116
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,32768,1,0,3.7108
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,128,1,0,0.1153
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,256,1,0,0.1374
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,512,1,0,0.1762
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1024,1,0,0.2486
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1536,1,0,0.3241
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,2048,1,0,0.4066
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,3072,1,0,0.5837
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,4096,1,0,0.7651
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,6144,1,0,1.1578
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,8192,1,0,1.5592
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,10240,1,0,1.9739
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,12288,1,0,2.3880
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,16,1,0,0.1072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,16384,1,0,3.2275
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,32,1,0,0.1116
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,64,1,0,0.1152
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,128,1,0,0.1339
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,256,1,0,0.1730
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,32768,1,0,7.0832
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,512,1,0,0.2426
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1024,1,0,0.3936
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1536,1,0,0.5641
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,2048,1,0,0.7379
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,3072,1,0,1.1102
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,4096,1,0,1.4826
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,6144,1,0,2.2359
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,8192,1,0,2.9918
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,10240,1,0,3.7683
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,16,1,0,0.1090
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,12288,1,0,4.5752
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,32,1,0,0.1153
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,64,1,0,0.1343
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,16384,1,0,6.2676
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,128,1,0,0.1705
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,256,1,0,0.2389
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,512,1,0,0.3856
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1024,1,0,0.7280
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1536,1,0,1.0863
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,2048,1,0,1.4420
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,3072,1,0,2.1547
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,16,1,0,0.1157
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,32,1,0,0.1350
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,4096,1,0,2.8588
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,64,1,0,0.1701
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,6144,1,0,4.3329
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,128,1,0,0.2365
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,8192,1,0,5.8632
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,256,1,0,0.3839
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,512,1,0,0.7198
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1024,1,0,1.4169
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1536,1,0,2.1128
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,2048,1,0,2.7952
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,16,1,0,0.1339
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,32,1,0,0.1688
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,3072,1,0,4.2034
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,64,1,0,0.2378
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,128,1,0,0.3823
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,256,1,0,0.7183
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,4096,1,0,5.6480
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,512,1,0,1.4048
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1024,1,0,2.7581
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,16,1,0,0.1720
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,32,1,0,0.2407
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1536,1,0,4.1350
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,64,1,0,0.3841
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,2048,1,0,5.5208
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,128,1,0,0.7184
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,256,1,0,1.3995
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,512,1,0,2.7446
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,16,1,0,0.2448
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,32,1,0,0.3892
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,64,1,0,0.7229
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1024,1,0,5.4681
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,256,1,0,2.7325
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,128,1,0,1.4003
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,16,1,0,0.0912
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,32,1,0,0.1033
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,64,1,0,0.0988
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,128,1,0,0.1073
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,512,1,0,5.4307
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,256,1,0,0.1098
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,512,1,0,0.1179
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1024,1,0,0.1422
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1536,1,0,0.1665
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,2048,1,0,0.1927
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,3072,1,0,0.2411
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,4096,1,0,0.2824
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,6144,1,0,0.3734
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,8192,1,0,0.4715
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,10240,1,0,0.5777
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,12288,1,0,0.6804
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,16384,1,0,0.8933
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,32768,1,0,1.7483
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,16,1,0,0.0952
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,32,1,0,0.0988
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,64,1,0,0.1073
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,128,1,0,0.1072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,256,1,0,0.1156
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,512,1,0,0.1360
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1024,1,0,0.1784
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1536,1,0,0.2204
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,2048,1,0,0.2572
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,3072,1,0,0.3342
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,4096,1,0,0.4222
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,6144,1,0,0.6050
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,8192,1,0,0.7939
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,10240,1,0,0.9838
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,12288,1,0,1.1675
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,16384,1,0,1.5468
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,16,1,0,0.0988
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,32,1,0,0.1032
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,32768,1,0,3.2460
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,64,1,0,0.1071
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,128,1,0,0.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,256,1,0,0.1339
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,512,1,0,0.1730
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1024,1,0,0.2448
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1536,1,0,0.3150
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,2048,1,0,0.3965
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,3072,1,0,0.5662
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,4096,1,0,0.7426
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,6144,1,0,1.0905
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,8192,1,0,1.4443
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,10240,1,0,1.8124
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,12288,1,0,2.1885
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,16,1,0,0.1031
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,32,1,0,0.1094
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,16384,1,0,2.9365
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,64,1,0,0.1114
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,128,1,0,0.1312
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,256,1,0,0.1699
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,512,1,0,0.2374
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,32768,1,0,6.1185
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1024,1,0,0.3848
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1536,1,0,0.5475
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,2048,1,0,0.7155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,3072,1,0,1.0545
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,4096,1,0,1.3897
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,6144,1,0,2.0961
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,8192,1,0,2.7892
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,10240,1,0,3.4890
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,16,1,0,0.1069
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,32,1,0,0.1131
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,12288,1,0,4.1958
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,64,1,0,0.1320
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,16384,1,0,5.6333
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,128,1,0,0.1669
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,256,1,0,0.2348
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,512,1,0,0.3761
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1024,1,0,0.7054
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1536,1,0,1.0350
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,2048,1,0,1.3670
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,3072,1,0,2.0464
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,16,1,0,0.1156
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,32,1,0,0.1298
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,4096,1,0,2.7116
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,64,1,0,0.1656
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,6144,1,0,4.0437
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,128,1,0,0.2319
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,256,1,0,0.3750
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,8192,1,0,5.3937
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,512,1,0,0.6983
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1024,1,0,1.3527
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1536,1,0,2.0235
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,2048,1,0,2.6695
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,16,1,0,0.1297
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,32,1,0,0.1651
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,64,1,0,0.2334
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,3072,1,0,3.9649
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,4096,1,0,5.2801
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,128,1,0,0.3697
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,256,1,0,0.6959
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,512,1,0,1.3470
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1024,1,0,2.6448
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,16,1,0,0.1659
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1536,1,0,3.9198
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,32,1,0,0.2324
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,64,1,0,0.3720
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,2048,1,0,5.2069
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,128,1,0,0.6907
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,256,1,0,1.3423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,512,1,0,2.6345
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,16,1,0,0.2353
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,32,1,0,0.3729
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1024,1,0,5.1735
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,64,1,0,0.6946
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,128,1,0,1.3479
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,256,1,0,2.6251
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,16,1,0,0.1410
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,32,1,0,0.1401
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,64,1,0,0.1502
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,512,1,0,5.1576
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,128,1,0,0.1644
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,256,1,0,0.1954
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,512,1,0,0.2770
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,1024,1,0,0.4553
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,1536,1,0,0.6390
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,2048,1,0,0.8439
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,3072,1,0,1.3077
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,4096,1,0,1.8084
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,6144,1,0,2.8654
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,8192,1,0,4.0815
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,10240,1,0,5.4498
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,12288,1,0,6.9666
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,16,1,0,0.1462
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,32,1,0,0.1514
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,64,1,0,0.1692
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,16384,1,0,10.6549
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,128,1,0,0.1952
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,256,1,0,0.2728
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,512,1,0,0.4390
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,1024,1,0,0.7982
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,1536,1,0,1.2099
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,2048,1,0,1.6376
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,3072,1,0,2.5181
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,4096,1,0,3.4694
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,6144,1,0,5.5806
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,8192,1,0,8.2372
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,32768,1,0,30.7242
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,10240,1,0,10.9990
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,16,1,0,0.1562
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,32,1,0,0.1708
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,12288,1,0,13.7771
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,64,1,0,0.2013
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,128,1,0,0.2729
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,256,1,0,0.4343
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,512,1,0,0.7730
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,1024,1,0,1.5509
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,16384,1,0,21.1123
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,1536,1,0,2.3292
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,2048,1,0,3.1425
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,3072,1,0,4.8962
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,4096,1,0,7.0309
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,6144,1,0,11.0730
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,8192,1,0,16.3007
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,16,1,0,0.1829
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,32,1,0,0.2080
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,64,1,0,0.2832
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,128,1,0,0.4354
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,256,1,0,0.7583
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,10240,1,0,21.8386
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,512,1,0,1.5052
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,1024,1,0,2.9775
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,12288,1,0,27.9772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,1536,1,0,4.5408
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,32768,1,0,61.1626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,2048,1,0,6.4018
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,3072,1,0,9.7234
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,16384,1,0,41.9703
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,4096,1,0,13.9469
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,16,1,0,0.2282
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,32,1,0,0.2992
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,6144,1,0,22.6195
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,64,1,0,0.4550
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,128,1,0,0.7699
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,256,1,0,1.4812
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,512,1,0,2.8918
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,8192,1,0,32.4511
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,1024,1,0,6.0724
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,1536,1,0,9.0119
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,10240,1,0,43.4842
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,2048,1,0,12.6985
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,12288,1,0,53.5588
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,3072,1,0,19.9205
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,16,1,0,0.3372
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,32,1,0,0.4870
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,64,1,0,0.8061
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,128,1,0,1.5044
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,4096,1,0,27.7900
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,256,1,0,2.8464
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,512,1,0,5.8966
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,32768,1,0,118.9273
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,16384,1,0,80.8087
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,1024,1,0,12.0624
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,6144,1,0,42.8661
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,1536,1,0,18.5480
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,16,1,0,0.5652
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,32,1,0,0.8742
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,64,1,0,1.5798
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,2048,1,0,25.2663
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,128,1,0,2.8938
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,256,1,0,5.8232
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,8192,1,0,61.8398
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,512,1,0,11.7276
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,3072,1,0,37.5079
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,16,1,0,1.0284
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,32,1,0,1.7097
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,64,1,0,3.0420
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,1024,1,0,24.0117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,128,1,0,5.9190
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,4096,1,0,52.4592
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,256,1,0,11.5559
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,1536,1,0,34.7377
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,16,1,0,2.0149
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,32,1,0,3.2975
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,512,1,0,23.3393
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,64,1,0,6.2125
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,2048,1,0,47.4719
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,16,1,0,0.1163
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,128,1,0,11.7539
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,32,1,0,0.1213
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,64,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,128,1,0,0.1327
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,256,1,0,0.1565
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,512,1,0,0.2014
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,1024,1,0,0.3080
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,1536,1,0,0.4161
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,256,1,0,23.0045
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,2048,1,0,0.5336
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,1024,1,0,44.9752
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,3072,1,0,0.7843
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,4096,1,0,1.0616
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,6144,1,0,1.6878
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,8192,1,0,2.3880
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,10240,1,0,3.1461
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,12288,1,0,3.9865
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,16,1,0,0.1213
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,32,1,0,0.1238
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,16384,1,0,5.8867
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,64,1,0,0.1337
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,128,1,0,0.1562
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,256,1,0,0.1995
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,512,1,0,43.6755
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,512,1,0,0.3000
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,1024,1,0,0.5053
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,1536,1,0,0.7311
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,32768,1,0,16.6413
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,2048,1,0,0.9701
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,3072,1,0,1.5014
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,4096,1,0,2.0630
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,6144,1,0,3.2700
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,8192,1,0,4.6466
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,16,1,0,0.1299
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,32,1,0,0.1380
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,10240,1,0,6.1585
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,64,1,0,0.1575
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,12288,1,0,7.8093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,128,1,0,0.2012
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,256,1,0,0.2937
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,512,1,0,0.4908
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,16384,1,0,11.7618
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,1024,1,0,0.9225
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,1536,1,0,1.4013
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,2048,1,0,1.8936
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,3072,1,0,2.9164
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,4096,1,0,4.0310
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,6144,1,0,6.4414
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,8192,1,0,9.3411
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,10240,1,0,12.3814
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,32768,1,0,32.9567
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,16,1,0,0.1422
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,32,1,0,0.1624
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,64,1,0,0.2075
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,12288,1,0,15.4263
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,128,1,0,0.2965
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,256,1,0,0.4852
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,512,1,0,0.9002
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,1024,1,0,1.8067
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,1536,1,0,2.7317
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,16384,1,0,23.3015
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,2048,1,0,3.7028
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,3072,1,0,5.7526
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,4096,1,0,8.1505
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,6144,1,0,12.7517
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,16,1,0,0.1727
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,32,1,0,0.2123
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,8192,1,0,18.5288
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,64,1,0,0.3062
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,128,1,0,0.4879
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,256,1,0,0.8891
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,512,1,0,1.7627
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,10240,1,0,24.5731
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,1024,1,0,3.5423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,1536,1,0,5.3951
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,2048,1,0,7.5129
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,12288,1,0,31.2466
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,32768,1,0,65.5128
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,3072,1,0,11.3941
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,16,1,0,0.2320
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,32,1,0,0.3224
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,64,1,0,0.5072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,128,1,0,0.8964
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,4096,1,0,16.1745
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,256,1,0,1.7401
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,16384,1,0,46.3445
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,512,1,0,3.4540
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,6144,1,0,25.8914
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,1024,1,0,7.1889
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,1536,1,0,10.6858
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,16,1,0,0.3588
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,32,1,0,0.5388
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,64,1,0,0.9336
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,8192,1,0,36.8611
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,128,1,0,1.7605
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,2048,1,0,14.9079
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,256,1,0,3.4058
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,3072,1,0,23.2549
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,512,1,0,7.0202
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,16,1,0,0.6159
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,32,1,0,0.9999
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,1024,1,0,14.2675
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,4096,1,0,32.1515
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,64,1,0,1.8350
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,128,1,0,3.4576
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,256,1,0,6.9299
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,16,1,0,1.1520
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,1536,1,0,21.8282
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,32,1,0,1.9652
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,512,1,0,13.9349
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,64,1,0,3.6051
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,2048,1,0,29.6676
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,128,1,0,7.0259
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,16,1,0,0.1139
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,32,1,0,0.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,64,1,0,0.1157
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,128,1,0,0.1205
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,256,1,0,0.1319
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,512,1,0,0.1613
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,1024,1,0,0.2186
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,1536,1,0,0.2830
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,256,1,0,13.7453
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,2048,1,0,0.3506
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,3072,1,0,0.4907
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,1024,1,0,28.3810
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,4096,1,0,0.6386
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,6144,1,0,0.9823
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,8192,1,0,1.3740
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,10240,1,0,1.8175
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,12288,1,0,2.2940
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,16384,1,0,3.3443
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,16,1,0,0.1133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,512,1,0,27.7636
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,32,1,0,0.1185
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,64,1,0,0.1197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,128,1,0,0.1282
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,32768,1,0,8.9833
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,256,1,0,0.1583
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,512,1,0,0.2142
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,1024,1,0,0.3328
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,1536,1,0,0.4599
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,2048,1,0,0.5865
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,3072,1,0,0.8759
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,4096,1,0,1.1995
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,6144,1,0,1.9151
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,8192,1,0,2.6958
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,16,1,0,0.1175
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,10240,1,0,3.5430
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,12288,1,0,4.4482
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,16384,1,0,6.5013
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,32,1,0,0.1216
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,64,1,0,0.1278
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,128,1,0,0.1567
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,256,1,0,0.2112
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,512,1,0,0.3237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,1024,1,0,0.5581
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,1536,1,0,0.8226
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,2048,1,0,1.1074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,3072,1,0,1.7249
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,4096,1,0,2.3726
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,32768,1,0,17.8300
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,6144,1,0,3.7466
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,8192,1,0,5.2551
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,10240,1,0,6.9114
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,16,1,0,0.1236
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,32,1,0,0.1299
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,12288,1,0,8.7242
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,64,1,0,0.1577
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,128,1,0,0.2139
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,16384,1,0,12.9691
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,256,1,0,0.3185
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,512,1,0,0.5447
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,1024,1,0,1.0607
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,1536,1,0,1.6292
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,2048,1,0,2.2004
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,3072,1,0,3.3875
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,4096,1,0,4.6398
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,6144,1,0,7.3503
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,16,1,0,0.1360
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,32,1,0,0.1626
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,8192,1,0,10.5329
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,64,1,0,0.2195
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,128,1,0,0.3209
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,32768,1,0,35.2761
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,10240,1,0,13.8716
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,256,1,0,0.5389
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,512,1,0,1.0394
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,1024,1,0,2.1180
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,12288,1,0,17.2238
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,1536,1,0,3.1981
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,2048,1,0,4.3133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,3072,1,0,6.6627
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,16,1,0,0.1713
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,16384,1,0,25.6655
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,4096,1,0,9.3400
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,32,1,0,0.2277
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,64,1,0,0.3283
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,128,1,0,0.5407
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,256,1,0,1.0223
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,512,1,0,2.0705
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,6144,1,0,14.5079
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,1024,1,0,4.1505
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,1536,1,0,6.3015
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,16,1,0,0.2447
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,8192,1,0,20.8588
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,2048,1,0,8.7166
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,32,1,0,0.3453
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,64,1,0,0.5582
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,128,1,0,1.0340
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,3072,1,0,13.1487
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,256,1,0,2.0477
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,512,1,0,4.0638
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,4096,1,0,18.5008
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,16,1,0,0.3834
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,1024,1,0,8.3915
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,32,1,0,0.5924
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,64,1,0,1.0731
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,128,1,0,2.0689
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,1536,1,0,12.4677
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,256,1,0,4.0222
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,16,1,0,0.6712
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,2048,1,0,17.2564
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,512,1,0,8.2274
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,32,1,0,1.1365
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,64,1,0,2.1413
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,128,1,0,4.0718
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,32,1,0,0.1139
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,1024,1,0,16.6117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,16,1,0,0.1061
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,64,1,0,0.1135
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,256,1,0,8.1330
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,256,1,0,0.1235
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,128,1,0,0.1173
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,512,1,0,0.1359
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,1024,1,0,0.1789
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,1536,1,0,0.2196
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,2048,1,0,0.2573
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,3072,1,0,0.3480
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,4096,1,0,0.4390
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,6144,1,0,0.6386
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,512,1,0,16.2802
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,8192,1,0,0.8711
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,10240,1,0,1.1309
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,12288,1,0,1.4124
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,16384,1,0,2.0416
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,16,1,0,0.1119
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,32,1,0,0.1132
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,64,1,0,0.1159
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,32768,1,0,5.2562
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,128,1,0,0.1196
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,256,1,0,0.1336
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,512,1,0,0.1746
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,1024,1,0,0.2459
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,1536,1,0,0.3266
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,2048,1,0,0.4050
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,3072,1,0,0.5762
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,4096,1,0,0.7684
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,6144,1,0,1.2052
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,8192,1,0,1.6885
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,10240,1,0,2.2125
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,12288,1,0,2.7646
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,16384,1,0,3.9566
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,16,1,0,0.1156
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,32,1,0,0.1176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,64,1,0,0.1194
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,128,1,0,0.1317
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,256,1,0,0.1725
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,512,1,0,0.2400
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,1024,1,0,0.3869
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,1536,1,0,0.5454
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,32768,1,0,10.1955
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,2048,1,0,0.7150
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,3072,1,0,1.1000
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,4096,1,0,1.5128
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,6144,1,0,2.3819
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,8192,1,0,3.3077
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,10240,1,0,4.3001
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,16,1,0,0.1176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,12288,1,0,5.3734
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,32,1,0,0.1214
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,64,1,0,0.1320
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,16384,1,0,7.7069
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,128,1,0,0.1688
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,256,1,0,0.2374
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,512,1,0,0.3784
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,1024,1,0,0.6879
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,1536,1,0,1.0430
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,2048,1,0,1.4221
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,3072,1,0,2.1937
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,4096,1,0,2.9867
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,32768,1,0,20.1949
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,6144,1,0,4.6598
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,8192,1,0,6.4566
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,16,1,0,0.1239
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,10240,1,0,8.4094
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,32,1,0,0.1351
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,64,1,0,0.1714
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,12288,1,0,10.5105
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,128,1,0,0.2385
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,256,1,0,0.3736
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,512,1,0,0.6724
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,1024,1,0,1.3700
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,1536,1,0,2.0953
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,16384,1,0,15.3426
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,2048,1,0,2.8147
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,3072,1,0,4.2997
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,16,1,0,0.1378
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,4096,1,0,5.8454
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,32,1,0,0.1772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,64,1,0,0.2465
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,128,1,0,0.3768
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,256,1,0,0.6671
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,6144,1,0,9.1286
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,512,1,0,1.3488
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,8192,1,0,12.9245
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,1024,1,0,2.7283
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,1536,1,0,4.1185
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,2048,1,0,5.5208
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,16,1,0,0.1855
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,32,1,0,0.2523
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,64,1,0,0.3843
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,3072,1,0,8.4445
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,128,1,0,0.6714
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,256,1,0,1.3359
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,4096,1,0,11.7091
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,512,1,0,2.6831
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,1024,1,0,5.3603
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,16,1,0,0.2724
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,32,1,0,0.4015
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,1536,1,0,8.0836
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,64,1,0,0.6905
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,128,1,0,1.3476
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,256,1,0,2.6580
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,2048,1,0,11.0934
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,512,1,0,5.2763
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,16,1,0,0.4413
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,32,1,0,0.7217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,64,1,0,1.3846
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,1024,1,0,10.7565
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,16,1,0,0.1031
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,128,1,0,2.6854
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,32,1,0,0.1114
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,256,1,0,5.2293
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,64,1,0,0.1140
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,128,1,0,0.1164
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,256,1,0,0.1216
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,512,1,0,0.1280
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,1024,1,0,0.1586
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,1536,1,0,0.1890
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,2048,1,0,0.2210
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,512,1,0,10.6030
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,3072,1,0,0.2813
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,4096,1,0,0.3370
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,6144,1,0,0.4758
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,8192,1,0,0.6333
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,10240,1,0,0.8103
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,12288,1,0,0.9879
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,16,1,0,0.1074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,16384,1,0,1.3810
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,32,1,0,0.1115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,64,1,0,0.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,32768,1,0,3.3515
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,128,1,0,0.1197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,256,1,0,0.1278
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,512,1,0,0.1544
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,1024,1,0,0.2098
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,1536,1,0,0.2644
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,2048,1,0,0.3156
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,3072,1,0,0.4342
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,4096,1,0,0.5692
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,6144,1,0,0.8620
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,8192,1,0,1.1783
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,10240,1,0,1.5176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,12288,1,0,1.8786
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,16,1,0,0.1145
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,32,1,0,0.1134
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,64,1,0,0.1175
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,16384,1,0,2.6452
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,128,1,0,0.1238
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,32768,1,0,6.4418
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,256,1,0,0.1524
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,512,1,0,0.2035
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,1024,1,0,0.3050
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,1536,1,0,0.4137
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,2048,1,0,0.5346
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,3072,1,0,0.7977
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,4096,1,0,1.0734
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,6144,1,0,1.6659
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,16,1,0,0.1134
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,10240,1,0,2.9641
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,12288,1,0,3.6652
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,16384,1,0,5.1442
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,8192,1,0,2.2924
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,32,1,0,0.1181
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,64,1,0,0.1256
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,128,1,0,0.1503
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,256,1,0,0.2005
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,512,1,0,0.2997
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,1024,1,0,0.5184
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,1536,1,0,0.7656
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,32768,1,0,12.5347
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,2048,1,0,1.0204
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,3072,1,0,1.5632
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,4096,1,0,2.1183
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,6144,1,0,3.2857
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,16,1,0,0.1174
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,8192,1,0,4.4960
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,10240,1,0,5.7731
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,32,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,64,1,0,0.1483
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,12288,1,0,7.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,128,1,0,0.1993
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,256,1,0,0.2966
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,16384,1,0,10.0314
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,512,1,0,0.5074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,1024,1,0,0.9931
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,1536,1,0,1.5043
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,2048,1,0,2.0230
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,3072,1,0,3.0975
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,16,1,0,0.1260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,4096,1,0,4.1719
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,32,1,0,0.1513
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,6144,1,0,6.4122
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,64,1,0,0.2016
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,128,1,0,0.2977
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,256,1,0,0.5034
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,512,1,0,0.9791
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,8192,1,0,8.7893
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,1024,1,0,1.9784
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,1536,1,0,2.9987
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,16,1,0,0.1584
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,2048,1,0,4.0049
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,32,1,0,0.2072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,3072,1,0,6.0480
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,64,1,0,0.3039
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,128,1,0,0.5081
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,256,1,0,0.9731
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,4096,1,0,8.1736
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,512,1,0,1.9551
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,1024,1,0,3.9146
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,16,1,0,0.2168
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,32,1,0,0.3120
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,64,1,0,0.5136
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,1536,1,0,5.8757
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,128,1,0,0.9768
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,256,1,0,1.9460
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,2048,1,0,7.8506
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,512,1,0,3.8707
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,16,1,0,0.3303
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,32,1,0,0.5338
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,64,1,0,0.9941
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,1024,1,0,7.6852
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,128,1,0,1.9550
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,16,1,0,0.1009
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,256,1,0,3.8495
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,32,1,0,0.1052
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,64,1,0,0.1095
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,128,1,0,0.1155
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,256,1,0,0.1197
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,512,1,0,7.5991
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,512,1,0,0.1278
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,1024,1,0,0.1525
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,1536,1,0,0.1790
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,2048,1,0,0.2031
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,3072,1,0,0.2581
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,4096,1,0,0.3038
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,6144,1,0,0.4052
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,8192,1,0,0.5133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,10240,1,0,0.6381
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,12288,1,0,0.7814
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,16384,1,0,1.0797
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,32768,1,0,2.4074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,16,1,0,0.1113
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,32,1,0,0.1096
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,64,1,0,0.1131
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,128,1,0,0.1176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,256,1,0,0.1260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,512,1,0,0.1466
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,1024,1,0,0.1919
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,1536,1,0,0.2408
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,2048,1,0,0.2812
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,3072,1,0,0.3713
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,4096,1,0,0.4699
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,6144,1,0,0.7046
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,8192,1,0,0.9507
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,10240,1,0,1.2074
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,12288,1,0,1.4633
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,16,1,0,0.1133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,32,1,0,0.1117
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,16384,1,0,2.0070
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,64,1,0,0.1178
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,128,1,0,0.1222
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,32768,1,0,4.5754
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,256,1,0,0.1440
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,512,1,0,0.1867
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,1024,1,0,0.2720
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,1536,1,0,0.3558
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,2048,1,0,0.4473
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,3072,1,0,0.6624
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,4096,1,0,0.8847
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,6144,1,0,1.3349
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,8192,1,0,1.7977
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,10240,1,0,2.2922
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,12288,1,0,2.8039
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,16384,1,0,3.8769
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,16,1,0,0.1128
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,32,1,0,0.1156
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,64,1,0,0.1229
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,128,1,0,0.1422
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,256,1,0,0.1850
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,32768,1,0,8.8519
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,512,1,0,0.2648
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,1024,1,0,0.4369
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,1536,1,0,0.6419
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,2048,1,0,0.8521
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,3072,1,0,1.2735
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,4096,1,0,1.6933
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,6144,1,0,2.5971
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,8192,1,0,3.5254
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,16,1,0,0.1176
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,10240,1,0,4.4810
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,32,1,0,0.1217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,64,1,0,0.1418
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,128,1,0,0.1821
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,12288,1,0,5.4832
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,16384,1,0,7.5594
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,256,1,0,0.2629
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,512,1,0,0.4333
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,1024,1,0,0.8358
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,1536,1,0,1.2422
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,2048,1,0,1.6439
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,3072,1,0,2.4878
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,4096,1,0,3.3481
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,16,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,6144,1,0,5.1046
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,32,1,0,0.1422
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,64,1,0,0.1817
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,128,1,0,0.2623
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,256,1,0,0.4307
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,512,1,0,0.8215
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,8192,1,0,6.9061
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,1024,1,0,1.6152
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,1536,1,0,2.4406
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,2048,1,0,3.2563
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,16,1,0,0.1463
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,32,1,0,0.1842
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,64,1,0,0.2638
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,3072,1,0,4.9128
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,128,1,0,0.4316
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,4096,1,0,6.5766
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,256,1,0,0.8180
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,512,1,0,1.6008
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,1024,1,0,3.2069
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,16,1,0,0.1898
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,32,1,0,0.2692
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,1536,1,0,4.8136
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,64,1,0,0.4376
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,128,1,0,0.8220
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,2048,1,0,6.4193
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,256,1,0,1.5954
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,512,1,0,3.1842
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,16,1,0,0.2772
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,32,1,0,0.4442
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,1024,1,0,6.3228
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,64,1,0,0.8317
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,128,1,0,1.5957
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,16,1,0,0.1052
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,256,1,0,3.1672
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,32,1,0,0.1030
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,64,1,0,0.1143
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,128,1,0,0.1123
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,512,1,0,6.2814
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,256,1,0,0.1198
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,512,1,0,0.1278
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,1024,1,0,0.1523
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,1536,1,0,0.1748
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,2048,1,0,0.1969
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,3072,1,0,0.2445
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,4096,1,0,0.2874
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,6144,1,0,0.3802
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,8192,1,0,0.4809
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,10240,1,0,0.5873
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,12288,1,0,0.6919
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,16384,1,0,0.9089
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,16,1,0,0.1072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,32768,1,0,1.9507
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,32,1,0,0.1093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,64,1,0,0.1153
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,128,1,0,0.1157
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,256,1,0,0.1260
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,512,1,0,0.1456
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,1024,1,0,0.1874
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,1536,1,0,0.2282
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,2048,1,0,0.2664
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,3072,1,0,0.3461
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,4096,1,0,0.4353
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,6144,1,0,0.6267
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,8192,1,0,0.8234
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,10240,1,0,1.0330
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,12288,1,0,1.2542
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,16384,1,0,1.6966
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,16,1,0,0.1132
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,32,1,0,0.1132
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,64,1,0,0.1175
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,128,1,0,0.1216
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,256,1,0,0.1425
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,32768,1,0,3.6236
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,512,1,0,0.1811
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,1024,1,0,0.2569
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,1536,1,0,0.3322
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,2048,1,0,0.4145
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,3072,1,0,0.5943
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,4096,1,0,0.7785
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,6144,1,0,1.1740
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,8192,1,0,1.5691
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,10240,1,0,1.9764
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,12288,1,0,2.3837
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,16384,1,0,3.2129
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,16,1,0,0.1133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,32,1,0,0.1156
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,64,1,0,0.1220
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,128,1,0,0.1421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,256,1,0,0.1787
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,32768,1,0,6.9759
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,512,1,0,0.2501
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,1024,1,0,0.4040
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,1536,1,0,0.5785
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,2048,1,0,0.7583
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,3072,1,0,1.1365
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,4096,1,0,1.5046
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,6144,1,0,2.2590
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,8192,1,0,3.0118
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,16,1,0,0.1152
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,10240,1,0,3.7962
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,32,1,0,0.1213
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,12288,1,0,4.6015
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,64,1,0,0.1423
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,16384,1,0,6.2779
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,128,1,0,0.1755
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,256,1,0,0.2469
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,512,1,0,0.3994
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,1024,1,0,0.7460
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,1536,1,0,1.1126
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,2048,1,0,1.4704
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,3072,1,0,2.1923
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,4096,1,0,2.9104
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,16,1,0,0.1217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,32,1,0,0.1418
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,6144,1,0,4.3878
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,64,1,0,0.1751
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,8192,1,0,5.9175
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,128,1,0,0.2464
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,256,1,0,0.3951
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,512,1,0,0.7407
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,1024,1,0,1.4499
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,1536,1,0,2.1607
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,2048,1,0,2.8605
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,16,1,0,0.1401
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,32,1,0,0.1770
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,3072,1,0,4.2921
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,64,1,0,0.2448
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,128,1,0,0.3940
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,4096,1,0,5.7424
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,256,1,0,0.7378
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,512,1,0,1.4456
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,1024,1,0,2.8330
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,16,1,0,0.1786
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,32,1,0,0.2486
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,1536,1,0,4.2277
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,64,1,0,0.3968
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,2048,1,0,5.6549
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,128,1,0,0.7382
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,256,1,0,1.4405
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,16,1,0,0.2530
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,512,1,0,2.8152
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,32,1,0,0.4022
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,1024,1,0,5.6107
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,64,1,0,0.7446
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,16,1,0,0.1011
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,128,1,0,1.4424
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,32,1,0,0.0989
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,64,1,0,0.1076
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,128,1,0,0.1133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,256,1,0,0.1156
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,256,1,0,2.8053
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,512,1,0,5.5833
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,512,1,0,0.1252
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,1024,1,0,0.1485
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,1536,1,0,0.1707
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,2048,1,0,0.1950
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,3072,1,0,0.2421
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,4096,1,0,0.2816
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,6144,1,0,0.3689
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,8192,1,0,0.4649
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,10240,1,0,0.5693
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,12288,1,0,0.6672
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,16384,1,0,0.8764
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,32768,1,0,1.7112
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,16,1,0,0.0991
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,32,1,0,0.1029
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,64,1,0,0.1115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,128,1,0,0.1135
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,256,1,0,0.1237
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,512,1,0,0.1402
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,1024,1,0,0.1829
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,1536,1,0,0.2252
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,2048,1,0,0.2602
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,3072,1,0,0.3365
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,4096,1,0,0.4219
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,6144,1,0,0.6038
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,8192,1,0,0.7903
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,10240,1,0,0.9804
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,12288,1,0,1.1644
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,16,1,0,0.1033
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,16384,1,0,1.5360
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,32,1,0,0.1115
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,64,1,0,0.1133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,32768,1,0,3.1758
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,128,1,0,0.1217
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,256,1,0,0.1379
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,512,1,0,0.1790
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,1024,1,0,0.2487
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,1536,1,0,0.3193
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,2048,1,0,0.4013
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,3072,1,0,0.5705
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,4096,1,0,0.7487
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,6144,1,0,1.1014
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,8192,1,0,1.4498
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,10240,1,0,1.8162
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,12288,1,0,2.1876
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,16,1,0,0.1086
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,32,1,0,0.1133
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,64,1,0,0.1196
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,16384,1,0,2.9207
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,128,1,0,0.1359
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,256,1,0,0.1750
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,32768,1,0,6.0364
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,512,1,0,0.2443
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,1024,1,0,0.3909
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,1536,1,0,0.5540
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,2048,1,0,0.7251
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,3072,1,0,1.0674
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,4096,1,0,1.4093
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,6144,1,0,2.1128
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,8192,1,0,2.7981
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,10240,1,0,3.4913
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,16,1,0,0.1135
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,12288,1,0,4.1932
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,16384,1,0,5.6241
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,32,1,0,0.1194
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,64,1,0,0.1373
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,128,1,0,0.1738
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,256,1,0,0.2406
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,512,1,0,0.3844
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,1024,1,0,0.7151
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,1536,1,0,1.0488
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,2048,1,0,1.3869
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,3072,1,0,2.0714
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,16,1,0,0.1216
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,32,1,0,0.1351
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,4096,1,0,2.7247
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,6144,1,0,4.0684
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,64,1,0,0.1719
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,128,1,0,0.2398
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,8192,1,0,5.4211
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,256,1,0,0.3833
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,512,1,0,0.7095
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,1024,1,0,1.3761
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,1536,1,0,2.0503
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,2048,1,0,2.6972
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,16,1,0,0.1361
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,32,1,0,0.1726
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,3072,1,0,4.0051
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,128,1,0,0.3812
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,4096,1,0,5.3202
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,64,1,0,0.2385
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,256,1,0,0.7076
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,512,1,0,1.3690
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,1024,1,0,2.6769
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,1536,1,0,3.9724
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,16,1,0,0.1719
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,32,1,0,0.2393
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,64,1,0,0.3779
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,2048,1,0,5.2720
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,128,1,0,0.7056
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,256,1,0,1.3667
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,512,1,0,2.6694
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,16,1,0,0.2419
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,32,1,0,0.3846
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,64,1,0,0.7072
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,128,1,0,1.3683
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,1024,1,0,5.2393
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,256,1,0,2.6651
VLLM,0.17.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,512,1,0,5.2219
