framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,128,0.1406
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,256,0.1418
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,512,0.1434
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,1024,0.1475
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,2048,0.1482
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,4096,0.1574
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,8192,0.1586
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,16384,0.1733
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,32768,0.1668
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,65536,0.1710
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,131072,0.1832
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,128,0.1475
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,256,0.1483
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,512,0.1519
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,1024,0.1506
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,2048,0.1567
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,4096,0.1657
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,16384,0.1703
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,8192,0.1768
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,32768,0.1687
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,65536,0.1827
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,131072,0.2118
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,128,0.1513
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,256,0.1544
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,512,0.1518
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,1024,0.1610
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,2048,0.1668
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,4096,0.1764
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,8192,0.1689
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,16384,0.1715
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,32768,0.1856
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,65536,0.2116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,131072,0.2628
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,128,0.1595
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,256,0.1565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,512,0.1624
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,1024,0.1716
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,2048,0.1808
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,4096,0.1717
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,8192,0.1744
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,16384,0.1875
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,32768,0.2148
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,65536,0.2644
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,131072,0.3760
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,128,0.1702
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,256,0.1751
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,512,0.1758
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,1024,0.1853
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,2048,0.1801
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,4096,0.1829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,8192,0.1968
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,16384,0.2262
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,32768,0.2799
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,65536,0.3983
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,131072,0.6449
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,128,0.1787
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,256,0.1798
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,512,0.1892
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,2048,0.1954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,1024,0.1908
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,4096,0.2131
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,8192,0.2401
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,16384,0.2923
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,32768,0.4107
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,65536,0.6603
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,131072,1.1474
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,128,0.2039
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,256,0.2063
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,512,0.2078
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,1024,0.2150
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,2048,0.2276
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,4096,0.2551
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,8192,0.3106
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,16384,0.4279
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,32768,0.6748
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,65536,1.1752
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,131072,2.1528
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,128,0.2456
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,256,0.2489
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,512,0.2540
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,1024,0.2671
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,2048,0.2972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,4096,0.3557
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,8192,0.4711
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,16384,0.7259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,32768,1.2247
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,65536,2.2591
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,131072,4.2789
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,128,0.3283
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,512,0.3537
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,256,0.3384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,2048,0.4420
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,1024,0.3843
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,4096,0.5595
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,8192,0.8170
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,16384,1.3118
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,32768,2.3867
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,65536,4.7241
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,131072,9.6677
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,128,0.5432
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,2048,0.7723
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,512,0.5883
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,256,0.5592
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,1024,0.6468
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,4096,0.9961
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,8192,1.4976
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,16384,2.4744
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,32768,4.4070
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,65536,10.7799
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,128,0.9752
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,256,0.9975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,512,1.0608
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,2048,1.4405
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,1024,1.1822
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,4096,1.9589
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,8192,2.9634
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,16384,5.0700
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,32768,10.1017
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,128,0.1122
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,256,0.1129
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,512,0.1190
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,1024,0.1233
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,4096,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,8192,0.1255
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,2048,0.1161
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,16384,0.1365
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,32768,0.1483
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,65536,0.1437
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,131072,0.1650
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,128,0.1194
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,256,0.1206
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,512,0.1257
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,1024,0.1259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,2048,0.1241
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,4096,0.1280
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,8192,0.1392
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,16384,0.1465
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,32768,0.1528
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,65536,0.1690
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,131072,0.1945
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,128,0.1221
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,256,0.1232
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,512,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,1024,0.1270
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,2048,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,4096,0.1414
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,8192,0.1483
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,32768,0.1652
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,16384,0.1505
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,65536,0.1911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,131072,0.2410
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,128,0.1278
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,256,0.1247
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,512,0.1282
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,1024,0.1360
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,2048,0.1422
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,4096,0.1501
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,8192,0.1499
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,16384,0.1658
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,32768,0.1962
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,65536,0.2446
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,131072,0.3602
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,128,0.1328
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,256,0.1345
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,1024,0.1474
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,512,0.1380
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,2048,0.1550
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,4096,0.1531
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,8192,0.1695
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,16384,0.1958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,32768,0.2500
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,65536,0.3630
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,131072,0.5696
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,128,0.1430
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,256,0.1437
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,512,0.1484
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,2048,0.1590
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,1024,0.1578
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,4096,0.1760
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,8192,0.2037
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,16384,0.2542
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,32768,0.3648
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,65536,0.5776
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,131072,1.0138
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,128,0.1506
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,256,0.1527
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,512,0.1652
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,1024,0.1735
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,2048,0.1889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,4096,0.2141
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,8192,0.2706
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,16384,0.3783
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,32768,0.5914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,65536,1.0334
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,131072,1.8982
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,128,0.1770
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,256,0.1810
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,512,0.1879
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,1024,0.2032
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,2048,0.2328
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,4096,0.2824
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,8192,0.3930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,16384,0.6178
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,32768,1.0440
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,65536,1.9254
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,131072,3.7534
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,128,0.2269
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,256,0.2336
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,512,0.2496
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,1024,0.2754
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,2048,0.3303
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,4096,0.4414
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,8192,0.6642
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,16384,1.1226
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,32768,2.0170
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,65536,3.8231
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,131072,8.0929
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,128,0.3362
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,256,0.3546
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,512,0.3827
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,1024,0.4390
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,2048,0.5531
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,4096,0.7832
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,8192,1.2371
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,16384,2.1860
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,32768,4.1484
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,65536,8.2207
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,128,0.5747
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,256,0.6007
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,512,0.6548
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,2048,0.9833
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,4096,1.4368
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,1024,0.7650
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,8192,2.2806
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,16384,4.1301
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,32768,8.5667
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,128,0.0977
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,256,0.0928
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,512,0.0995
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,1024,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,2048,0.1052
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,4096,0.1057
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,8192,0.1059
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,16384,0.1158
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,32768,0.1395
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,65536,0.1270
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,131072,0.1604
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,128,0.1004
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,256,0.1025
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,512,0.1025
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,1024,0.1091
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,2048,0.1098
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,4096,0.1124
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,8192,0.1161
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,16384,0.1262
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,32768,0.1298
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,65536,0.1501
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,131072,0.1833
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,256,0.1017
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,128,0.1038
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,512,0.1087
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,1024,0.1122
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,2048,0.1134
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,4096,0.1158
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,8192,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,16384,0.1342
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,32768,0.1510
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,65536,0.1901
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,131072,0.2606
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,128,0.1084
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,256,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,512,0.1137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,2048,0.1185
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,1024,0.1175
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,4096,0.1286
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,8192,0.1389
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,16384,0.1549
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,32768,0.1940
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,65536,0.2646
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,131072,0.4027
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,128,0.1154
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,256,0.1171
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,512,0.1190
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,1024,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,2048,0.1318
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,4096,0.1431
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,8192,0.1595
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,16384,0.1994
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,32768,0.2769
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,65536,0.4349
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,131072,0.7450
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,128,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,256,0.1240
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,512,0.1256
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,1024,0.1367
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,2048,0.1484
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,4096,0.1656
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,8192,0.2049
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,16384,0.2821
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,32768,0.4414
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,65536,0.7471
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,131072,1.3691
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,128,0.1344
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,256,0.1341
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,512,0.1399
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,1024,0.1521
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,2048,0.1662
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,4096,0.2083
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,8192,0.2859
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,16384,0.4404
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,32768,0.7414
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,65536,1.3518
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,131072,2.6069
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,128,0.1524
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,256,0.1509
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,512,0.1658
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,1024,0.1849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,4096,0.3006
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,2048,0.2226
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,8192,0.4538
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,16384,0.7601
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,32768,1.4133
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,65536,2.7360
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,131072,5.5139
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,256,0.1921
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,128,0.1837
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,1024,0.2508
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,2048,0.3242
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,512,0.2119
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,4096,0.4766
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,8192,0.7839
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,16384,1.4076
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,32768,2.7309
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,65536,5.6958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,131072,11.7769
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,128,0.2561
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,256,0.2614
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,2048,0.5108
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,512,0.2996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,1024,0.3715
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,4096,0.7801
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,8192,1.3261
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,16384,2.5143
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,32768,5.4207
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,65536,10.9818
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,128,0.4093
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,256,0.4155
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,512,0.4914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,1024,0.6355
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,2048,0.9122
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,4096,1.4531
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,8192,2.5659
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,16384,5.3447
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,32768,10.9850
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,128,0.0868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,256,0.0853
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,512,0.0846
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,2048,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,4096,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,1024,0.0917
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,8192,0.0958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,16384,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,32768,0.1132
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,65536,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,131072,0.1343
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,128,0.0894
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,256,0.0872
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,512,0.0889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,2048,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,1024,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,4096,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,8192,0.1020
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,16384,0.1136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,32768,0.1288
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,65536,0.1407
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,131072,0.1632
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,128,0.0877
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,256,0.0873
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,512,0.0927
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,1024,0.0939
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,2048,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,4096,0.1018
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,8192,0.1117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,16384,0.1254
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,32768,0.1347
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,65536,0.1630
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,131072,0.2134
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,128,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,256,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,512,0.0956
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,1024,0.1040
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,4096,0.1175
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,2048,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,8192,0.1267
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,16384,0.1405
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,32768,0.1680
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,65536,0.2202
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,131072,0.3170
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,128,0.0970
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,256,0.0976
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,512,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,1024,0.1119
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,2048,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,4096,0.1280
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,8192,0.1401
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,16384,0.1673
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,32768,0.2209
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,65536,0.3239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,131072,0.5220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,128,0.1031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,256,0.1057
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,512,0.1126
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,1024,0.1196
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,2048,0.1304
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,8192,0.1711
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,4096,0.1477
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,16384,0.2212
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,32768,0.3275
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,65536,0.5401
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,131072,0.9545
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,128,0.1178
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,256,0.1180
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,1024,0.1388
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,512,0.1254
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,2048,0.1545
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,4096,0.1804
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,8192,0.2307
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,16384,0.3387
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,32768,0.5476
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,65536,0.9631
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,131072,1.7812
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,128,0.1293
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,512,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,256,0.1322
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,1024,0.1530
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,2048,0.1817
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,4096,0.2310
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,8192,0.3376
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,16384,0.5476
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,32768,0.9595
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,65536,1.7697
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,131072,3.4545
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,128,0.1527
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,256,0.1541
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,512,0.1697
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,1024,0.1995
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,2048,0.2497
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,4096,0.3538
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,8192,0.5642
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,16384,0.9943
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,32768,1.8123
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,65536,3.4864
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,131072,6.9467
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,128,0.1938
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,256,0.2008
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,512,0.2298
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,2048,0.3879
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,1024,0.2841
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,4096,0.5960
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,8192,1.0133
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,16384,1.8868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,32768,3.5520
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,65536,7.1572
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,128,0.2807
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,256,0.2917
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,512,0.3495
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,2048,0.6404
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,4096,1.0382
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,1024,0.4458
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,16384,3.5304
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,8192,1.8326
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,32768,6.9093
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,128,0.0817
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,256,0.0746
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,512,0.0827
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,1024,0.0831
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,2048,0.0848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,4096,0.0828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,8192,0.0829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,16384,0.0926
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,32768,0.1059
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,65536,0.1144
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,131072,0.1305
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,128,0.0830
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,512,0.0848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,1024,0.0872
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,256,0.0831
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,2048,0.0872
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,4096,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,8192,0.0941
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,16384,0.0999
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,32768,0.1217
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,65536,0.1361
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,131072,0.1603
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,256,0.0851
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,128,0.0846
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,512,0.0895
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,1024,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,2048,0.0850
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,4096,0.0961
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,8192,0.0982
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,16384,0.1180
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,32768,0.1323
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,65536,0.1565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,131072,0.2102
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,128,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,256,0.0913
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,512,0.0912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,1024,0.0897
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,2048,0.1001
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,4096,0.1026
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,8192,0.1200
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,16384,0.1332
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,32768,0.1605
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,65536,0.2169
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,131072,0.3111
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,128,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,256,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,512,0.0957
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,1024,0.1031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,2048,0.1057
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,4096,0.1224
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,8192,0.1369
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,16384,0.1639
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,32768,0.2199
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,65536,0.3211
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,131072,0.5144
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,128,0.1000
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,256,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,512,0.1052
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,1024,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,2048,0.1263
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,4096,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,8192,0.1657
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,16384,0.2176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,32768,0.3231
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,65536,0.5288
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,131072,0.9417
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,128,0.1058
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,256,0.1108
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,512,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,1024,0.1312
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,2048,0.1450
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,4096,0.1706
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,8192,0.2216
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,16384,0.3310
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,32768,0.5364
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,65536,0.9494
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,131072,1.7641
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,128,0.1169
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,256,0.1180
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,512,0.1300
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,1024,0.1459
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,2048,0.1726
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,4096,0.2244
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,8192,0.3295
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,16384,0.5391
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,32768,0.9446
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,65536,1.7566
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,131072,3.3862
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,128,0.1385
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,256,0.1422
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,512,0.1587
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,1024,0.1821
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,2048,0.2354
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,4096,0.3373
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,8192,0.5455
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,16384,0.9819
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,32768,1.7973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,65536,3.4292
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,131072,6.8658
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,128,0.1668
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,256,0.1752
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,512,0.2054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,2048,0.3558
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,1024,0.2534
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,4096,0.5621
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,8192,0.9837
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,16384,1.8462
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,32768,3.5096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,65536,6.9343
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,128,0.2266
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,256,0.2409
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,512,0.2928
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,2048,0.5830
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,1024,0.3875
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,4096,0.9803
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,8192,1.7861
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,16384,3.4401
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,32768,6.7749
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,128,0.0770
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,256,0.0706
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,512,0.0749
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,1024,0.0766
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,2048,0.0768
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,4096,0.0839
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,8192,0.0808
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,16384,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,32768,0.0955
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,65536,0.1108
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,131072,0.1234
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,128,0.0789
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,256,0.0822
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,512,0.0830
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,1024,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,2048,0.0832
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,4096,0.0829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,8192,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,16384,0.0988
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,32768,0.1164
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,65536,0.1343
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,131072,0.1587
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,128,0.0851
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,256,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,512,0.0852
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,1024,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,2048,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,4096,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,8192,0.0994
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,16384,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,32768,0.1317
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,65536,0.1545
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,131072,0.2060
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,128,0.0868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,256,0.0854
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,512,0.0908
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,1024,0.0876
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,2048,0.0950
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,4096,0.0995
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,8192,0.1155
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,16384,0.1312
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,32768,0.1562
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,65536,0.2065
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,131072,0.3108
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,128,0.0924
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,512,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,256,0.0937
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,1024,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,2048,0.1065
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,4096,0.1200
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,8192,0.1338
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,16384,0.1605
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,32768,0.2136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,65536,0.3171
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,131072,0.5176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,128,0.0959
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,512,0.1006
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,256,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,1024,0.1067
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,2048,0.1195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,4096,0.1380
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,8192,0.1636
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,16384,0.2156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,32768,0.3176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,131072,0.9423
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,65536,0.5314
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,128,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,256,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,512,0.1117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,2048,0.1412
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,4096,0.1672
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,1024,0.1266
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,8192,0.2189
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,16384,0.3241
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,32768,0.5384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,65536,0.9464
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,131072,1.7718
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,128,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,256,0.1137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,512,0.1253
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,1024,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,2048,0.1701
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,4096,0.2202
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,16384,0.5361
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,8192,0.3243
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,32768,0.9359
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,65536,1.7500
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,131072,3.3835
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,128,0.1302
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,256,0.1336
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,512,0.1505
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,2048,0.2289
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,1024,0.1790
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,4096,0.3330
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,8192,0.5381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,16384,0.9659
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,32768,1.7858
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,65536,3.4195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,131072,6.8377
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,128,0.1569
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,256,0.1627
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,512,0.1920
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,1024,0.2449
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,2048,0.3459
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,4096,0.5486
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,16384,1.8329
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,8192,0.9768
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,32768,3.4880
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,65536,6.9894
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,128,0.2036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,256,0.2176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,512,0.2686
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,2048,0.5574
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,1024,0.3618
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,4096,0.9529
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,8192,1.7476
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,16384,3.4343
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,32768,6.7034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,128,0.0766
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,256,0.0766
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,512,0.0727
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,1024,0.0753
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,2048,0.0746
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,4096,0.0728
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,8192,0.0819
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,16384,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,32768,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,65536,0.1125
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,131072,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,128,0.0788
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,256,0.0772
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,512,0.0804
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,1024,0.0809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,2048,0.0786
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,4096,0.0787
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,8192,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,16384,0.0919
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,32768,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,65536,0.1307
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,131072,0.1565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,128,0.0828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,256,0.0809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,512,0.0829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,1024,0.0811
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,2048,0.0829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,8192,0.0933
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,4096,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,16384,0.1088
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,32768,0.1295
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,65536,0.1519
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,131072,0.2059
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,128,0.0861
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,256,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,512,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,1024,0.0871
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,2048,0.0913
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,4096,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,8192,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,16384,0.1280
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,32768,0.1568
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,65536,0.2111
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,131072,0.3126
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,128,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,256,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,512,0.0912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,1024,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,2048,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,4096,0.1168
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,8192,0.1341
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,16384,0.1582
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,32768,0.2119
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,65536,0.3125
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,131072,0.5084
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,128,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,256,0.0974
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,1024,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,512,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,2048,0.1170
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,4096,0.1346
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,8192,0.1574
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,16384,0.2109
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,32768,0.3226
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,65536,0.5263
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,131072,0.9354
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,128,0.1003
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,256,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,512,0.1093
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,1024,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,2048,0.1397
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,4096,0.1662
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,8192,0.2192
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,16384,0.3243
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,32768,0.5334
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,65536,0.9416
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,131072,1.7647
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,128,0.1085
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,256,0.1093
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,512,0.1217
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,1024,0.1401
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,2048,0.1667
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,4096,0.2198
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,8192,0.3255
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,16384,0.5324
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,32768,0.9324
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,65536,1.7545
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,131072,3.4121
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,128,0.1230
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,256,0.1295
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,512,0.1468
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,1024,0.1733
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,2048,0.2243
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,4096,0.3287
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,8192,0.5394
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,16384,0.9628
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,32768,1.7809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,65536,3.4094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,131072,6.8127
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,128,0.1511
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,256,0.1570
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,512,0.1867
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,2048,0.3376
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,1024,0.2364
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,4096,0.5455
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,8192,0.9722
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,16384,1.8280
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,32768,3.4591
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,65536,6.9537
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,128,0.1932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,256,0.2064
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,512,0.2562
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,2048,0.5458
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,4096,0.9376
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,1024,0.3513
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,8192,1.7406
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,16384,3.3583
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,32768,6.6665
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,128,0.0746
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,256,0.0728
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,512,0.0709
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,1024,0.0696
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,2048,0.0727
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,4096,0.0748
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,8192,0.0791
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,16384,0.0834
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,32768,0.0920
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,65536,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,131072,0.1203
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,128,0.0770
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,256,0.0769
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,512,0.0753
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,1024,0.0787
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,2048,0.0767
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,4096,0.0788
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,16384,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,8192,0.0828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,32768,0.1087
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,65536,0.1296
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,131072,0.1518
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,128,0.0808
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,256,0.0759
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,512,0.0809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,1024,0.0789
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,2048,0.0830
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,4096,0.0829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,8192,0.0880
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,16384,0.1031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,32768,0.1263
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,65536,0.1500
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,131072,0.2026
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,128,0.0808
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,256,0.0832
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,512,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,1024,0.0851
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,2048,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,4096,0.0924
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,8192,0.1083
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,16384,0.1253
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,32768,0.1514
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,65536,0.2094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,131072,0.3070
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,128,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,256,0.0912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,512,0.0922
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,1024,0.0912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,2048,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,4096,0.1121
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,8192,0.1280
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,16384,0.1560
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,32768,0.2142
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,65536,0.3149
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,131072,0.5086
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,128,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,256,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,1024,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,2048,0.1123
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,4096,0.1325
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,512,0.0962
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,8192,0.1554
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,16384,0.2099
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,32768,0.3159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,131072,0.9325
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,65536,0.5277
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,128,0.0995
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,256,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,512,0.1052
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,2048,0.1333
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,1024,0.1197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,4096,0.1586
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,8192,0.2141
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,16384,0.3208
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,32768,0.5317
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,65536,0.9394
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,131072,1.7637
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,128,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,256,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,512,0.1171
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,1024,0.1364
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,2048,0.1630
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,4096,0.2143
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,8192,0.3200
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,16384,0.5344
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,32768,0.9348
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,65536,1.7440
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,131072,3.3767
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,256,0.1282
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,128,0.1179
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,512,0.1423
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,2048,0.2221
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,4096,0.3235
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,1024,0.1686
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,8192,0.5330
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,16384,0.9533
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,32768,1.7755
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,65536,3.4121
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,131072,6.8901
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,128,0.1457
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,256,0.1523
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,512,0.1794
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,1024,0.2299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,2048,0.3335
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,4096,0.5397
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,8192,0.9618
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,16384,1.8259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,32768,3.4572
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,65536,6.9213
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,128,0.1849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,256,0.1996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,512,0.2502
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,2048,0.5352
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,1024,0.3432
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,16384,3.3460
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,8192,1.7316
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,4096,0.9314
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,32768,6.6210
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,512,0.1392
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,128,0.1387
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,1024,0.1437
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,2048,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,4096,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,256,0.1421
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,8192,0.1514
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,32768,0.1608
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,16384,0.1564
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,65536,0.1653
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,131072,0.1704
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,128,0.1476
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,256,0.1443
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,512,0.1461
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,1024,0.1474
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,2048,0.1511
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,4096,0.1561
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,32768,0.1652
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,16384,0.1648
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,8192,0.1568
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,65536,0.1747
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,131072,0.1852
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,128,0.1485
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,256,0.1488
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,512,0.1507
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,1024,0.1546
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,2048,0.1568
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,4096,0.1606
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,8192,0.1634
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,16384,0.1673
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,32768,0.1750
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,65536,0.1884
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,131072,0.2138
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,128,0.1532
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,256,0.1544
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,512,0.1572
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,1024,0.1620
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,2048,0.1658
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,4096,0.1661
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,8192,0.1689
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,16384,0.1770
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,32768,0.1898
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,65536,0.2191
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,131072,0.2758
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,128,0.1660
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,256,0.1685
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,512,0.1684
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,1024,0.1718
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,2048,0.1737
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,4096,0.1777
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,8192,0.1868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,16384,0.2019
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,32768,0.2319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,65536,0.2961
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,131072,0.4201
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,128,0.1802
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,256,0.1811
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,512,0.1896
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,1024,0.1889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,2048,0.1935
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,4096,0.2025
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,8192,0.2158
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,16384,0.2464
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,32768,0.3097
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,65536,0.4349
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,131072,0.7073
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,256,0.2068
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,512,0.2084
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,128,0.2064
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,1024,0.2121
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,2048,0.2199
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,4096,0.2334
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,8192,0.2624
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,16384,0.3253
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,32768,0.4490
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,65536,0.7273
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,131072,1.2842
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,128,0.2551
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,256,0.2569
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,512,0.2609
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,1024,0.2689
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,2048,0.2811
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,4096,0.3119
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,8192,0.3749
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,16384,0.5049
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,32768,0.7876
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,65536,1.3790
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,131072,2.5234
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,128,0.3554
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,512,0.3662
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,1024,0.3803
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,2048,0.4140
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,256,0.3575
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,4096,0.4769
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,8192,0.6094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,16384,0.9078
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,32768,1.5031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,65536,2.7539
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,131072,5.0844
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,128,0.5930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,256,0.5958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,512,0.6149
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,1024,0.6430
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,2048,0.7074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,4096,0.8342
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,8192,1.1120
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,16384,1.6841
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,32768,2.8207
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,65536,4.8580
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,128,1.0872
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,256,1.0984
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,512,1.1289
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,2048,1.3247
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,1024,1.1941
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,4096,1.5976
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,16384,3.4126
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,8192,2.1673
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,32768,5.9696
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,128,0.1131
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,256,0.1136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,512,0.1096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,1024,0.1145
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,4096,0.1139
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,8192,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,2048,0.1313
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,32768,0.1324
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,16384,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,131072,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,65536,0.1361
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,128,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,256,0.1192
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,512,0.1202
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,1024,0.1175
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,2048,0.1242
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,4096,0.1219
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,8192,0.1300
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,16384,0.1336
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,32768,0.1433
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,65536,0.1519
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,131072,0.1586
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,128,0.1197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,256,0.1211
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,512,0.1195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,1024,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,2048,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,4096,0.1259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,8192,0.1341
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,16384,0.1393
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,32768,0.1485
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,65536,0.1580
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,131072,0.1799
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,128,0.1247
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,256,0.1233
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,512,0.1265
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,1024,0.1279
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,2048,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,4096,0.1353
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,8192,0.1412
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,16384,0.1484
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,32768,0.1585
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,65536,0.1830
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,131072,0.2292
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,128,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,256,0.1311
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,512,0.1340
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,1024,0.1368
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,2048,0.1413
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,4096,0.1448
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,8192,0.1504
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,16384,0.1627
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,32768,0.1867
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,65536,0.2338
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,131072,0.3292
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,128,0.1388
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,256,0.1409
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,512,0.1419
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,1024,0.1472
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,2048,0.1525
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,4096,0.1582
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,8192,0.1708
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,16384,0.1954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,32768,0.2455
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,65536,0.3470
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,131072,0.5558
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,128,0.1534
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,256,0.1546
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,512,0.1610
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,1024,0.1661
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,2048,0.1733
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,4096,0.1861
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,8192,0.2099
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,16384,0.2617
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,32768,0.3614
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,65536,0.5713
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,131072,0.9984
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,128,0.1808
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,256,0.1811
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,512,0.1841
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,1024,0.1935
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,2048,0.2053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,4096,0.2310
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,8192,0.2801
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,16384,0.3799
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,32768,0.5951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,65536,1.0240
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,131072,1.9529
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,128,0.2328
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,256,0.2362
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,512,0.2456
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,2048,0.2876
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,1024,0.2598
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,4096,0.3355
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,8192,0.4411
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,16384,0.6555
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,32768,1.0941
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,65536,2.0560
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,131072,3.8816
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,256,0.3660
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,128,0.3566
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,512,0.3775
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,2048,0.4598
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,1024,0.4039
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,4096,0.5657
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,8192,0.7878
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,16384,1.2353
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,32768,2.1584
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,65536,4.2696
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,128,0.6102
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,256,0.6253
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,512,0.6502
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,2048,0.8151
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,1024,0.7017
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,4096,1.0187
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,8192,1.4616
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,16384,2.2848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,32768,3.9397
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,128,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,256,0.1026
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,1024,0.0962
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,512,0.0978
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,2048,0.0978
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,4096,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,8192,0.1032
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,16384,0.1008
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,32768,0.1078
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,65536,0.1109
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,131072,0.1244
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,128,0.0982
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,256,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,512,0.1035
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,1024,0.1043
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,2048,0.1040
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,4096,0.1083
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,8192,0.1061
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,16384,0.1136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,32768,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,65536,0.1260
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,131072,0.1440
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,128,0.1002
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,256,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,512,0.1058
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,1024,0.1067
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,2048,0.1093
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,4096,0.1088
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,8192,0.1148
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,32768,0.1260
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,16384,0.1185
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,65536,0.1474
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,131072,0.1773
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,128,0.1082
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,256,0.1078
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,512,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,1024,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,2048,0.1111
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,4096,0.1172
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,8192,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,16384,0.1300
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,32768,0.1522
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,65536,0.1815
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,131072,0.2446
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,128,0.1118
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,256,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,512,0.1189
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,1024,0.1172
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,2048,0.1200
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,4096,0.1257
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,8192,0.1341
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,16384,0.1560
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,32768,0.1893
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,65536,0.2607
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,131072,0.4027
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,128,0.1195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,1024,0.1287
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,256,0.1179
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,512,0.1214
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,2048,0.1317
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,4096,0.1388
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,8192,0.1619
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,16384,0.1952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,32768,0.2671
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,65536,0.4031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,131072,0.6922
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,128,0.1307
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,512,0.1305
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,256,0.1272
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,2048,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,1024,0.1394
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,8192,0.1992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,4096,0.1616
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,16384,0.2706
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,32768,0.4102
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,65536,0.6881
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,131072,1.2783
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,128,0.1479
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,256,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,512,0.1504
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,2048,0.1790
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,1024,0.1609
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,4096,0.2135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,8192,0.2867
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,16384,0.4266
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,32768,0.7123
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,65536,1.2963
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,131072,2.5367
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,128,0.1797
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,256,0.1804
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,1024,0.2079
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,2048,0.2428
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,512,0.1881
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,4096,0.3164
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,8192,0.4579
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,16384,0.7439
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,32768,1.3199
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,65536,2.5319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,131072,5.4031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,128,0.2526
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,256,0.2528
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,512,0.2694
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,1024,0.3024
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,4096,0.4983
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,2048,0.3683
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,8192,0.7558
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,16384,1.2727
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,32768,2.3335
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,65536,4.9309
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,128,0.4068
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,256,0.4071
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,512,0.4416
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,2048,0.6380
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,1024,0.5060
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,16384,2.4668
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,4096,0.8983
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,8192,1.4170
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,32768,4.9068
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,128,0.0836
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,256,0.0851
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,512,0.0837
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,1024,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,2048,0.0889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,4096,0.0885
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,8192,0.0877
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,16384,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,32768,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,65536,0.1096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,131072,0.1157
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,128,0.0913
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,256,0.0872
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,512,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,1024,0.0879
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,2048,0.0929
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,4096,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,16384,0.1037
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,8192,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,32768,0.1158
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,65536,0.1221
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,131072,0.1326
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,128,0.0893
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,256,0.0897
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,512,0.0892
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,1024,0.0900
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,4096,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,8192,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,2048,0.0894
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,16384,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,32768,0.1187
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,65536,0.1300
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,131072,0.1510
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,128,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,256,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,512,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,1024,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,2048,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,4096,0.1030
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,8192,0.1106
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,16384,0.1195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,32768,0.1320
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,65536,0.1568
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,131072,0.1997
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,256,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,128,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,512,0.0982
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,1024,0.1016
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,2048,0.1047
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,4096,0.1142
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,8192,0.1255
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,16384,0.1369
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,32768,0.1555
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,65536,0.1995
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,131072,0.2847
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,128,0.1031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,256,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,512,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,1024,0.1096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,2048,0.1184
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,4096,0.1244
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,8192,0.1364
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,16384,0.1616
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,32768,0.2038
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,65536,0.2929
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,131072,0.4746
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,512,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,2048,0.1298
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,128,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,256,0.1124
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,1024,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,4096,0.1443
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,8192,0.1671
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,16384,0.2117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,65536,0.4815
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,32768,0.3020
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,131072,0.8395
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,128,0.1232
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,512,0.1308
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,2048,0.1489
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,1024,0.1367
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,256,0.1237
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,4096,0.1724
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,8192,0.2168
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,32768,0.4885
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,16384,0.3058
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,65536,0.8480
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,131072,1.5805
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,128,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,512,0.1547
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,256,0.1451
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,2048,0.1914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,1024,0.1639
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,4096,0.2364
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,8192,0.3271
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,16384,0.5105
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,32768,0.8714
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,65536,1.5949
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,131072,3.0352
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,128,0.1851
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,256,0.1890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,512,0.1991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,2048,0.2702
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,1024,0.2244
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,4096,0.3645
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,8192,0.5530
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,16384,0.9242
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,32768,1.6809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,65536,3.2053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,128,0.2693
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,256,0.2745
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,2048,0.4302
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,512,0.2954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,1024,0.3407
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,4096,0.6102
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,8192,0.9604
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,16384,1.6664
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,32768,3.1108
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,128,0.0745
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,512,0.0788
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,1024,0.0768
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,2048,0.0806
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,4096,0.0807
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,256,0.0792
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,8192,0.0831
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,16384,0.0848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,32768,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,65536,0.1023
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,131072,0.1078
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,128,0.0839
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,256,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,512,0.0826
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,1024,0.0866
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,2048,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,4096,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,8192,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,16384,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,32768,0.1015
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,65536,0.1190
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,131072,0.1252
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,128,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,2048,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,512,0.0889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,4096,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,1024,0.0829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,256,0.0836
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,8192,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,16384,0.1005
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,32768,0.1137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,131072,0.1443
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,65536,0.1230
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,128,0.0872
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,256,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,512,0.0929
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,1024,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,2048,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,4096,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,8192,0.1036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,16384,0.1150
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,32768,0.1260
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,65536,0.1472
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,131072,0.1930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,128,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,256,0.0970
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,512,0.0981
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,1024,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,2048,0.1014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,4096,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,8192,0.1178
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,16384,0.1303
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,32768,0.1520
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,65536,0.1987
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,131072,0.2824
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,128,0.1003
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,256,0.0980
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,512,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,1024,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,2048,0.1035
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,4096,0.1181
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,8192,0.1306
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,16384,0.1545
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,32768,0.1996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,65536,0.2854
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,131072,0.4691
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,256,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,1024,0.1122
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,2048,0.1232
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,512,0.1069
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,4096,0.1339
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,128,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,8192,0.1583
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,16384,0.2023
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,65536,0.4728
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,32768,0.2937
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,131072,0.8325
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,128,0.1111
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,256,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,2048,0.1382
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,512,0.1155
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,4096,0.1615
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,1024,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,8192,0.2081
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,16384,0.2968
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,32768,0.4774
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,65536,0.8366
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,131072,1.5453
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,128,0.1280
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,256,0.1306
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,2048,0.1736
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,512,0.1398
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,1024,0.1490
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,4096,0.2218
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,8192,0.3113
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,16384,0.4935
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,32768,0.8535
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,65536,1.5714
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,131072,3.0367
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,128,0.1560
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,256,0.1576
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,512,0.1691
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,1024,0.1937
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,2048,0.2412
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,4096,0.3333
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,8192,0.5192
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,16384,0.8861
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,32768,1.6195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,65536,3.1022
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,128,0.2088
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,256,0.2130
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,2048,0.3696
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,512,0.2366
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,1024,0.2815
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,4096,0.5472
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,8192,0.8918
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,16384,1.5772
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,32768,2.9779
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,256,0.0748
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,128,0.0788
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,512,0.0770
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,1024,0.0746
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,2048,0.0828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,4096,0.0768
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,8192,0.0806
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,16384,0.0856
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,32768,0.0866
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,131072,0.1098
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,65536,0.0912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,128,0.0808
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,256,0.0811
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,512,0.0830
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,1024,0.0848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,2048,0.0848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,4096,0.0807
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,8192,0.0868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,16384,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,32768,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,65536,0.1141
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,131072,0.1254
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,128,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,256,0.0848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,512,0.0871
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,1024,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,2048,0.0829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,4096,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,8192,0.0881
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,16384,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,32768,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,65536,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,131072,0.1454
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,128,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,256,0.0889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,512,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,1024,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,2048,0.0909
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,4096,0.0904
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,8192,0.0954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,16384,0.1127
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,32768,0.1240
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,65536,0.1492
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,131072,0.1891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,256,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,128,0.0953
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,1024,0.0937
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,2048,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,4096,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,512,0.0933
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,8192,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,16384,0.1273
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,32768,0.1486
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,65536,0.1936
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,131072,0.2816
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,128,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,256,0.0950
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,1024,0.0957
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,2048,0.1028
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,4096,0.1157
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,512,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,8192,0.1281
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,16384,0.1523
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,32768,0.1932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,65536,0.2870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,131072,0.4626
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,128,0.1014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,256,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,512,0.1041
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,1024,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,2048,0.1179
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,4096,0.1325
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,8192,0.1551
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,16384,0.1994
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,32768,0.2915
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,65536,0.4708
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,131072,0.8272
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,128,0.1073
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,256,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,512,0.1097
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,1024,0.1219
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,2048,0.1349
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,4096,0.1566
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,8192,0.2035
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,16384,0.2910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,65536,0.8286
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,32768,0.4703
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,131072,1.5360
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,128,0.1210
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,256,0.1232
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,512,0.1320
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,2048,0.1669
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,1024,0.1403
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,4096,0.2161
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,8192,0.3027
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,16384,0.4855
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,32768,0.8438
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,65536,1.5610
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,131072,3.0070
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,128,0.1423
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,256,0.1451
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,512,0.1567
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,1024,0.1805
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,2048,0.2283
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,4096,0.3190
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,8192,0.5055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,16384,0.8697
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,32768,1.5992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,65536,3.1186
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,128,0.1836
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,256,0.1870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,512,0.2095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,2048,0.3421
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,1024,0.2533
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,4096,0.5166
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,8192,0.8612
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,32768,2.9127
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,16384,1.5408
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,128,0.0748
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,256,0.0730
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,512,0.0749
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,1024,0.0727
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,2048,0.0726
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,4096,0.0784
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,8192,0.0722
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,16384,0.0788
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,32768,0.0809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,65536,0.0889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,131072,0.1041
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,128,0.0769
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,256,0.0772
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,512,0.0786
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,2048,0.0802
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,1024,0.0809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,4096,0.0768
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,8192,0.0809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,16384,0.0852
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,32768,0.0933
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,65536,0.1092
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,131072,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,128,0.0826
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,256,0.0818
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,1024,0.0860
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,2048,0.0811
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,512,0.0825
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,4096,0.0847
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,8192,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,16384,0.0924
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,32768,0.1071
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,65536,0.1218
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,131072,0.1442
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,128,0.0848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,512,0.0846
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,256,0.0868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,1024,0.0889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,2048,0.0876
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,4096,0.0900
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,8192,0.0946
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,16384,0.1067
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,32768,0.1216
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,65536,0.1425
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,131072,0.1886
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,128,0.0893
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,256,0.0892
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,512,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,1024,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,2048,0.0934
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,4096,0.0996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,8192,0.1113
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,16384,0.1241
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,32768,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,131072,0.2776
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,65536,0.1893
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,128,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,256,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,512,0.0941
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,2048,0.0959
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,1024,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,4096,0.1120
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,8192,0.1260
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,16384,0.1485
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,32768,0.1973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,65536,0.2858
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,131072,0.4629
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,128,0.0997
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,256,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,512,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,1024,0.1064
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,2048,0.1182
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,4096,0.1306
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,8192,0.1546
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,16384,0.1998
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,32768,0.2876
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,65536,0.4670
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,131072,0.8225
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,128,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,256,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,512,0.1068
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,1024,0.1197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,2048,0.1322
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,4096,0.1542
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,8192,0.2007
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,16384,0.2890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,32768,0.4675
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,65536,0.8252
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,131072,1.5363
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,128,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,256,0.1169
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,512,0.1259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,2048,0.1608
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,1024,0.1380
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,4096,0.2099
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,8192,0.3001
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,16384,0.4798
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,32768,0.8372
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,65536,1.5591
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,131072,3.0144
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,128,0.1340
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,256,0.1385
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,512,0.1501
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,1024,0.1741
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,2048,0.2208
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,4096,0.3122
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,16384,0.8605
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,8192,0.4962
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,32768,1.5905
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,65536,3.0560
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,128,0.1710
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,256,0.1753
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,512,0.1979
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,1024,0.2417
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,2048,0.3288
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,4096,0.5028
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,8192,0.8431
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,16384,1.5252
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,32768,2.8981
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,128,0.0819
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,256,0.0725
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,512,0.0774
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,1024,0.0793
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,2048,0.0788
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,4096,0.0776
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,8192,0.0809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,16384,0.0894
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,32768,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,65536,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,131072,0.1102
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,128,0.0795
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,256,0.0830
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,1024,0.0835
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,2048,0.0848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,512,0.0806
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,4096,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,8192,0.0892
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,16384,0.0920
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,32768,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,65536,0.1106
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,131072,0.1287
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,128,0.0828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,256,0.0865
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,512,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,1024,0.0840
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,2048,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,4096,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,8192,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,16384,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,32768,0.1092
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,131072,0.1466
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,65536,0.1249
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,256,0.0892
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,512,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,1024,0.0934
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,2048,0.0965
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,128,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,4096,0.0958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,8192,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,16384,0.1101
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,32768,0.1257
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,65536,0.1503
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,131072,0.1937
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,128,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,256,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,512,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,1024,0.1018
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,2048,0.0996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,4096,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,8192,0.1190
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,16384,0.1290
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,65536,0.1958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,32768,0.1523
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,131072,0.2801
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,128,0.0999
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,256,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,1024,0.1036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,512,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,2048,0.1071
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,4096,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,8192,0.1304
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,16384,0.1556
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,32768,0.1996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,65536,0.2888
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,131072,0.4690
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,128,0.1036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,256,0.1070
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,512,0.1071
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,1024,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,2048,0.1213
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,4096,0.1339
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,8192,0.1592
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,16384,0.2017
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,32768,0.2934
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,65536,0.4730
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,131072,0.8285
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,128,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,256,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,512,0.1121
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,1024,0.1259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,2048,0.1382
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,4096,0.1589
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,8192,0.2047
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,16384,0.2956
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,32768,0.4770
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,65536,0.8308
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,131072,1.5433
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,128,0.1205
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,256,0.1241
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,512,0.1323
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,1024,0.1444
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,2048,0.1671
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,4096,0.2174
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,16384,0.4830
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,32768,0.8436
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,8192,0.3060
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,65536,1.5623
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,131072,2.9922
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,128,0.1384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,256,0.1417
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,512,0.1552
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,1024,0.1781
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,2048,0.2245
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,4096,0.3161
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,8192,0.4980
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,16384,0.8608
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,32768,1.6011
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,65536,3.0832
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,128,0.1732
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,256,0.1787
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,512,0.2003
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,1024,0.2421
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,2048,0.3310
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,4096,0.5020
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,8192,0.8415
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,16384,1.5253
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,32768,2.9257
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,512,0.1361
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,128,0.1382
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,1024,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,256,0.1381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,2048,0.1476
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,4096,0.1496
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,8192,0.1544
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,16384,0.1687
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,32768,0.1559
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,65536,0.1657
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,131072,0.1742
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,128,0.1427
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,256,0.1380
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,512,0.1461
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,1024,0.1505
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,2048,0.1507
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,4096,0.1587
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,8192,0.1674
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,16384,0.1561
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,32768,0.1654
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,65536,0.1730
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,131072,0.2022
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,128,0.1414
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,256,0.1464
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,512,0.1503
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,1024,0.1545
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,2048,0.1608
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,4096,0.1695
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,8192,0.1557
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,32768,0.1748
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,65536,0.2050
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,16384,0.1650
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,131072,0.2555
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,128,0.1501
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,256,0.1505
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,512,0.1534
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,1024,0.1622
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,2048,0.1712
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,4096,0.1567
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,8192,0.1625
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,16384,0.1802
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,32768,0.2041
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,131072,0.3619
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,65536,0.2547
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,128,0.1608
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,512,0.1668
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,1024,0.1756
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,256,0.1613
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,2048,0.1659
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,4096,0.1747
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,8192,0.1880
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,16384,0.2115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,32768,0.2694
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,65536,0.3782
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,131072,0.6080
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,256,0.1678
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,128,0.1666
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,1024,0.1776
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,512,0.1756
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,2048,0.1856
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,4096,0.1977
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,8192,0.2234
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,16384,0.2804
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,32768,0.3947
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,65536,0.6171
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,131072,1.0762
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,128,0.1862
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,256,0.1883
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,512,0.1924
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,1024,0.1975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,2048,0.2132
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,4096,0.2366
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,8192,0.2927
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,16384,0.4065
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,32768,0.6288
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,65536,1.0917
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,131072,2.0504
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,128,0.2364
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,256,0.2380
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,512,0.2476
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,1024,0.2641
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,2048,0.2868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,4096,0.3446
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,8192,0.4547
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,16384,0.6930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,32768,1.1545
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,65536,2.0757
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,131072,3.9232
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,128,0.3308
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,256,0.3413
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,1024,0.3832
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,512,0.3572
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,2048,0.4439
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,4096,0.5537
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,8192,0.7930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,16384,1.2592
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,32768,2.2106
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,65536,4.2211
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,131072,8.4678
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,128,0.5160
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,256,0.5330
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,512,0.5613
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,1024,0.6153
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,2048,0.7235
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,4096,0.9354
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,8192,1.3654
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,16384,2.2563
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,32768,4.0934
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,65536,7.9914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,128,0.9138
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,256,0.9388
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,2048,1.3223
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,512,0.9911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,1024,1.1047
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,4096,1.7689
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,8192,2.6668
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,16384,4.5760
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,32768,8.9820
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,128,0.1195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,256,0.1295
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,512,0.1219
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,1024,0.1236
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,2048,0.1245
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,4096,0.1272
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,8192,0.1299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,16384,0.1470
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,32768,0.1523
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,65536,0.1479
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,131072,0.1647
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,128,0.1227
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,256,0.1219
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,512,0.1271
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,1024,0.1299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,8192,0.1345
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,2048,0.1248
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,4096,0.1340
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,32768,0.1558
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,65536,0.1667
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,131072,0.1965
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,16384,0.1473
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,128,0.1259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,256,0.1228
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,512,0.1278
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,2048,0.1327
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,1024,0.1272
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,4096,0.1382
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,8192,0.1500
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,32768,0.1623
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,65536,0.1917
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,16384,0.1497
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,131072,0.2451
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,128,0.1285
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,256,0.1281
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,512,0.1312
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,2048,0.1413
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,4096,0.1488
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,1024,0.1364
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,8192,0.1526
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,16384,0.1668
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,32768,0.1950
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,65536,0.2448
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,131072,0.3541
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,128,0.1365
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,256,0.1360
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,512,0.1381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,1024,0.1487
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,2048,0.1555
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,4096,0.1558
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,8192,0.1695
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,16384,0.1957
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,32768,0.2494
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,65536,0.3585
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,131072,0.5716
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,128,0.1458
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,512,0.1495
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,256,0.1446
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,1024,0.1568
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,2048,0.1584
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,4096,0.1757
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,8192,0.2015
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,16384,0.2579
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,32768,0.3611
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,65536,0.5758
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,131072,1.0062
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,128,0.1498
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,256,0.1524
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,1024,0.1708
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,2048,0.1868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,512,0.1620
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,4096,0.2129
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,8192,0.2681
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,16384,0.3725
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,32768,0.5868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,65536,1.0196
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,131072,1.8601
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,128,0.1750
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,256,0.1806
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,512,0.1883
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,1024,0.2053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,4096,0.2843
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,2048,0.2291
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,8192,0.3930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,16384,0.6011
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,32768,1.0359
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,65536,1.8689
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,131072,3.6163
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,256,0.2384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,128,0.2320
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,512,0.2532
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,1024,0.2857
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,2048,0.3387
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,4096,0.4437
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,8192,0.6619
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,16384,1.1098
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,32768,1.9703
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,65536,3.6919
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,131072,7.6865
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,128,0.3328
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,256,0.3469
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,512,0.3788
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,2048,0.5410
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,1024,0.4409
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,4096,0.7647
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,8192,1.2101
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,16384,2.1255
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,32768,3.9982
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,65536,7.9723
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,128,0.5521
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,256,0.5776
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,512,0.6294
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,1024,0.7325
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,2048,0.9339
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,4096,1.3647
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,8192,2.1997
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,16384,4.0172
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,32768,7.6380
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,128,0.1036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,256,0.1049
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,512,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,1024,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,2048,0.1108
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,4096,0.1086
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,8192,0.1108
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,16384,0.1214
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,32768,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,65536,0.1419
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,131072,0.1687
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,128,0.1076
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,256,0.1063
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,512,0.1101
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,2048,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,4096,0.1138
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,1024,0.1129
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,8192,0.1180
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,16384,0.1339
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,32768,0.1420
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,65536,0.1609
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,131072,0.1975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,128,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,256,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,512,0.1134
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,1024,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,2048,0.1152
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,4096,0.1221
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,8192,0.1345
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,16384,0.1413
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,32768,0.1616
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,65536,0.2014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,131072,0.2733
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,128,0.1144
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,256,0.1136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,512,0.1179
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,2048,0.1244
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,1024,0.1197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,4096,0.1362
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,8192,0.1443
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,16384,0.1666
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,32768,0.2029
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,65536,0.2733
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,131072,0.4122
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,128,0.1200
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,256,0.1197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,512,0.1248
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,1024,0.1293
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,2048,0.1401
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,4096,0.1503
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,8192,0.1671
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,16384,0.2082
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,32768,0.2843
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,65536,0.4422
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,131072,0.7512
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,128,0.1247
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,512,0.1316
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,256,0.1291
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,1024,0.1441
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,2048,0.1534
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,8192,0.2112
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,4096,0.1752
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,16384,0.2882
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,32768,0.4440
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,65536,0.7524
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,131072,1.3776
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,256,0.1397
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,512,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,128,0.1375
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,2048,0.1752
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,1024,0.1548
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,4096,0.2123
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,8192,0.2900
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,16384,0.4413
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,32768,0.7496
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,65536,1.3601
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,131072,2.5893
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,128,0.1570
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,256,0.1588
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,2048,0.2286
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,512,0.1697
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,4096,0.3049
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,1024,0.1888
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,8192,0.4587
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,16384,0.7664
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,32768,1.4159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,65536,2.6916
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,131072,5.2393
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,128,0.1945
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,512,0.2184
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,256,0.1954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,2048,0.3349
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,1024,0.2563
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,4096,0.4841
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,8192,0.7912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,16384,1.4206
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,32768,2.7368
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,65536,5.4658
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,131072,11.3154
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,128,0.2590
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,256,0.2659
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,512,0.3044
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,1024,0.3743
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,2048,0.5176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,4096,0.7875
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,8192,1.3318
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,16384,2.5408
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,32768,5.2019
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,65536,10.5396
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,128,0.4076
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,256,0.4124
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,512,0.4867
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,1024,0.6318
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,2048,0.9098
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,4096,1.4546
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,8192,2.5411
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,16384,5.2250
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,32768,10.7679
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,128,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,256,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,1024,0.1025
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,512,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,2048,0.1019
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,4096,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,16384,0.1125
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,8192,0.1023
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,65536,0.1337
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,32768,0.1236
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,131072,0.1472
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,128,0.1014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,256,0.1005
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,512,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,1024,0.1039
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,2048,0.1027
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,4096,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,8192,0.1065
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,16384,0.1210
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,32768,0.1384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,65536,0.1490
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,131072,0.1752
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,256,0.1011
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,128,0.1016
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,512,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,1024,0.1038
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,4096,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,2048,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,8192,0.1201
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,16384,0.1334
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,32768,0.1460
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,65536,0.1724
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,131072,0.2264
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,128,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,256,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,512,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,1024,0.1143
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,2048,0.1129
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,4096,0.1229
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,8192,0.1353
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,16384,0.1488
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,32768,0.1763
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,65536,0.2292
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,131072,0.3292
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,128,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,256,0.1113
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,512,0.1163
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,1024,0.1192
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,2048,0.1260
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,4096,0.1381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,8192,0.1525
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,16384,0.1790
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,32768,0.2348
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,65536,0.3386
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,131072,0.5376
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,128,0.1162
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,512,0.1213
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,256,0.1182
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,2048,0.1411
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,1024,0.1295
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,8192,0.1823
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,4096,0.1545
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,16384,0.2347
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,32768,0.3427
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,65536,0.5497
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,131072,0.9669
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,128,0.1197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,256,0.1259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,512,0.1299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,2048,0.1612
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,1024,0.1452
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,4096,0.1882
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,8192,0.2396
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,16384,0.3467
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,32768,0.5550
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,65536,0.9682
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,131072,1.7828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,128,0.1383
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,256,0.1366
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,512,0.1500
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,1024,0.1621
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,2048,0.1905
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,4096,0.2421
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,8192,0.3440
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,16384,0.5520
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,32768,0.9633
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,65536,1.7784
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,131072,3.4230
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,128,0.1598
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,256,0.1653
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,512,0.1804
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,1024,0.2086
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,2048,0.2581
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,8192,0.5711
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,4096,0.3614
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,16384,1.0013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,32768,1.8187
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,65536,3.4612
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,131072,6.9679
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,128,0.2016
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,256,0.2082
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,512,0.2398
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,2048,0.3964
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,4096,0.5998
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,1024,0.2886
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,16384,1.8890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,8192,1.0188
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,32768,3.5259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,65536,7.0112
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,128,0.2889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,256,0.3022
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,512,0.3552
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,2048,0.6464
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,4096,1.0428
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,1024,0.4523
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,8192,1.8420
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,16384,3.4737
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,32768,6.7495
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,128,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,256,0.0942
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,512,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,1024,0.0979
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,4096,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,2048,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,16384,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,32768,0.1173
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,8192,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,65536,0.1300
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,131072,0.1462
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,128,0.0996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,256,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,512,0.0988
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,1024,0.1015
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,2048,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,8192,0.1061
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,4096,0.0989
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,16384,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,32768,0.1318
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,65536,0.1472
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,131072,0.1712
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,128,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,256,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,512,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,4096,0.1038
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,1024,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,8192,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,2048,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,16384,0.1285
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,32768,0.1449
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,65536,0.1671
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,131072,0.2205
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,128,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,256,0.1040
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,512,0.1082
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,2048,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,4096,0.1177
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,1024,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,8192,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,16384,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,32768,0.1725
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,65536,0.2294
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,131072,0.3298
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,128,0.1106
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,512,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,1024,0.1105
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,256,0.1120
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,2048,0.1223
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,4096,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,8192,0.1504
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,16384,0.1760
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,32768,0.2308
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,65536,0.3337
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,131072,0.5355
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,128,0.1117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,256,0.1137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,512,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,1024,0.1193
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,2048,0.1337
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,4096,0.1531
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,16384,0.2292
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,8192,0.1763
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,32768,0.3358
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,65536,0.5438
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,131072,0.9604
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,128,0.1172
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,256,0.1175
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,512,0.1211
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,1024,0.1381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,2048,0.1544
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,4096,0.1800
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,8192,0.2323
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,16384,0.3385
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,32768,0.5468
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,65536,0.9604
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,131072,1.7775
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,128,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,256,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,1024,0.1532
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,512,0.1415
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,2048,0.1818
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,4096,0.2337
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,8192,0.3391
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,16384,0.5517
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,32768,0.9529
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,65536,1.7662
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,131072,3.3985
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,128,0.1450
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,256,0.1514
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,512,0.1677
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,1024,0.1930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,2048,0.2421
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,4096,0.3497
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,8192,0.5625
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,16384,0.9864
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,32768,1.7967
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,65536,3.4418
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,131072,6.7945
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,128,0.1771
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,256,0.1873
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,512,0.2126
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,1024,0.2658
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,2048,0.3686
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,4096,0.5776
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,8192,0.9927
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,16384,1.8594
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,32768,3.5192
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,65536,6.8930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,256,0.2526
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,512,0.3039
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,2048,0.5959
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,128,0.2399
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,1024,0.4007
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,4096,0.9920
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,8192,1.7843
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,16384,3.4159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,32768,6.6963
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,128,0.0913
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,512,0.0874
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,256,0.0894
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,1024,0.0937
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,2048,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,4096,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,8192,0.0925
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,16384,0.1002
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,32768,0.1132
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,65536,0.1260
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,131072,0.1382
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,128,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,256,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,512,0.0999
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,1024,0.1009
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,2048,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,16384,0.1042
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,4096,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,8192,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,32768,0.1288
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,65536,0.1442
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,131072,0.1743
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,128,0.0962
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,256,0.0997
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,512,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,1024,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,2048,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,4096,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,8192,0.1128
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,16384,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,32768,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,131072,0.2225
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,65536,0.1657
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,128,0.1017
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,512,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,256,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,1024,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,2048,0.1040
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,4096,0.1146
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,8192,0.1278
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,16384,0.1426
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,32768,0.1731
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,65536,0.2223
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,131072,0.3264
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,128,0.1077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,256,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,512,0.1084
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,1024,0.1085
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,2048,0.1158
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,4096,0.1297
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,8192,0.1425
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,16384,0.1727
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,32768,0.2295
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,65536,0.3299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,131072,0.5247
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,128,0.1102
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,256,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,1024,0.1192
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,512,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,2048,0.1306
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,4096,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,8192,0.1743
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,16384,0.2267
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,65536,0.5428
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,32768,0.3312
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,131072,0.9546
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,128,0.1157
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,256,0.1132
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,512,0.1195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,1024,0.1312
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,4096,0.1751
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,2048,0.1524
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,8192,0.2302
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,16384,0.3338
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,32768,0.5490
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,65536,0.9523
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,131072,1.7772
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,128,0.1184
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,512,0.1325
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,256,0.1208
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,2048,0.1803
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,1024,0.1510
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,4096,0.2313
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,32768,0.9517
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,131072,3.3881
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,8192,0.3344
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,16384,0.5460
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,65536,1.7600
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,128,0.1361
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,256,0.1453
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,512,0.1625
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,2048,0.2395
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,1024,0.1858
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,4096,0.3444
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,16384,0.9677
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,8192,0.5506
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,32768,1.7962
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,65536,3.4266
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,131072,6.8152
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,128,0.1683
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,256,0.1756
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,512,0.2026
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,1024,0.2555
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,2048,0.3554
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,4096,0.5638
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,8192,0.9845
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,16384,1.8407
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,32768,3.4852
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,65536,6.8784
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,128,0.2194
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,256,0.2316
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,512,0.2854
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,1024,0.3776
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,2048,0.5699
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,4096,0.9664
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,8192,1.7627
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,16384,3.3817
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,32768,6.6095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,128,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,256,0.0875
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,512,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,1024,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,2048,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,4096,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,8192,0.0909
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,16384,0.1007
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,32768,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,65536,0.1255
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,131072,0.1414
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,128,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,256,0.0913
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,512,0.0944
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,1024,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,2048,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,4096,0.0924
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,8192,0.1014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,16384,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,32768,0.1177
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,65536,0.1428
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,131072,0.1678
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,128,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,256,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,512,0.0996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,1024,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,2048,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,4096,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,8192,0.1102
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,16384,0.1173
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,32768,0.1425
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,65536,0.1663
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,131072,0.2187
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,128,0.1032
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,256,0.1022
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,512,0.1007
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,1024,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,2048,0.1077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,4096,0.1097
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,8192,0.1247
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,16384,0.1367
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,32768,0.1694
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,65536,0.2219
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,131072,0.3175
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,128,0.1068
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,2048,0.1136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,512,0.1102
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,1024,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,4096,0.1264
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,256,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,8192,0.1412
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,16384,0.1734
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,32768,0.2250
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,65536,0.3320
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,131072,0.5315
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,128,0.1078
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,256,0.1118
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,512,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,1024,0.1177
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,2048,0.1255
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,4096,0.1438
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,8192,0.1749
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,16384,0.2248
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,32768,0.3316
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,65536,0.5412
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,131072,0.9492
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,128,0.1141
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,256,0.1125
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,512,0.1178
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,1024,0.1283
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,2048,0.1473
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,4096,0.1766
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,8192,0.2272
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,16384,0.3356
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,32768,0.5449
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,65536,0.9528
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,131072,1.7734
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,128,0.1184
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,256,0.1180
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,512,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,2048,0.1748
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,1024,0.1481
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,8192,0.3339
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,4096,0.2292
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,16384,0.5444
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,32768,0.9474
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,65536,1.7560
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,131072,3.3904
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,128,0.1358
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,256,0.1401
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,2048,0.2360
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,1024,0.1832
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,4096,0.3407
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,512,0.1575
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,8192,0.5435
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,16384,0.9691
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,32768,1.7833
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,65536,3.4266
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,131072,6.7937
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,128,0.1641
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,512,0.1984
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,8192,0.9825
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,2048,0.3503
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,4096,0.5573
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,1024,0.2492
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,256,0.1702
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,16384,1.8365
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,32768,3.4968
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,65536,6.8628
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,128,0.2103
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,256,0.2241
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,512,0.2729
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,2048,0.5599
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,4096,0.9574
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,1024,0.3687
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,16384,3.3637
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,8192,1.7496
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,32768,6.6190
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,128,0.0872
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,512,0.0854
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,1024,0.0934
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,2048,0.0884
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,4096,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,8192,0.0949
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,16384,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,32768,0.1080
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,256,0.0872
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,65536,0.1202
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,131072,0.1388
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,128,0.0850
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,256,0.0914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,512,0.0909
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,1024,0.0912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,2048,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,4096,0.0912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,8192,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,16384,0.1017
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,32768,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,131072,0.1661
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,65536,0.1398
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,128,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,256,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,512,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,1024,0.0983
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,2048,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,4096,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,8192,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,16384,0.1179
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,32768,0.1368
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,65536,0.1576
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,131072,0.2100
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,256,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,128,0.0953
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,512,0.1001
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,1024,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,2048,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,4096,0.1067
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,8192,0.1202
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,16384,0.1399
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,32768,0.1619
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,131072,0.3202
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,65536,0.2213
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,128,0.1017
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,256,0.1038
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,512,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,2048,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,4096,0.1251
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,1024,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,8192,0.1412
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,16384,0.1671
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,32768,0.2170
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,65536,0.3300
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,131072,0.5267
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,128,0.1077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,256,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,512,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,1024,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,2048,0.1218
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,4096,0.1439
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,8192,0.1668
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,16384,0.2197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,32768,0.3318
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,65536,0.5379
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,131072,0.9495
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,128,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,256,0.1129
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,512,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,1024,0.1237
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,2048,0.1453
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,4096,0.1721
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,8192,0.2206
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,16384,0.3330
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,32768,0.5417
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,65536,0.9521
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,131072,1.7640
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,128,0.1147
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,256,0.1159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,512,0.1244
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,1024,0.1462
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,2048,0.1702
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,4096,0.2265
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,8192,0.3330
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,16384,0.5391
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,32768,0.9457
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,65536,1.7521
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,131072,3.3991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,128,0.1279
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,256,0.1365
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,512,0.1543
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,1024,0.1773
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,2048,0.2327
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,4096,0.3330
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,8192,0.5428
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,16384,0.9626
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,32768,1.7818
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,65536,3.4090
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,131072,6.7746
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,128,0.1566
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,256,0.1653
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,512,0.1933
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,1024,0.2420
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,2048,0.3478
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,4096,0.5516
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,8192,0.9828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,16384,1.8287
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,32768,3.4882
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,65536,6.8742
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,128,0.2040
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,256,0.2160
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,2048,0.5552
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,4096,0.9518
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,512,0.2650
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,8192,1.7561
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,1024,0.3604
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,16384,3.3781
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,32768,6.5425
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,128,0.1495
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,256,0.1303
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,512,0.1388
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,1024,0.1355
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,2048,0.1438
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,4096,0.1442
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,16384,0.1537
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,8192,0.1461
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,32768,0.1588
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,65536,0.1557
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,131072,0.1686
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,128,0.1344
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,256,0.1384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,2048,0.1422
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,4096,0.1488
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,1024,0.1421
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,16384,0.1560
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,512,0.1405
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,8192,0.1554
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,65536,0.1647
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,32768,0.1555
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,131072,0.1751
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,128,0.1390
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,256,0.1465
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,1024,0.1499
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,2048,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,4096,0.1509
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,8192,0.1552
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,512,0.1458
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,16384,0.1609
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,32768,0.1634
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,65536,0.1832
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,131072,0.2060
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,128,0.1445
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,512,0.1472
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,256,0.1428
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,1024,0.1521
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,8192,0.1635
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,2048,0.1551
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,4096,0.1578
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,16384,0.1659
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,32768,0.1794
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,65536,0.2046
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,131072,0.2520
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,128,0.1549
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,256,0.1564
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,512,0.1574
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,1024,0.1621
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,4096,0.1697
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,8192,0.1751
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,2048,0.1667
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,16384,0.1899
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,32768,0.2166
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,65536,0.2737
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,131072,0.3866
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,128,0.1692
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,512,0.1779
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,1024,0.1773
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,256,0.1691
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,2048,0.1809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,4096,0.1879
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,8192,0.2021
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,16384,0.2282
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,32768,0.2875
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,65536,0.4103
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,131072,0.6520
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,256,0.1934
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,2048,0.2054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,1024,0.1983
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,4096,0.2182
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,512,0.1950
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,128,0.1927
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,8192,0.2457
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,16384,0.3047
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,32768,0.4209
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,131072,1.1899
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,65536,0.6848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,128,0.2458
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,256,0.2474
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,2048,0.2741
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,1024,0.2610
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,4096,0.3016
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,512,0.2534
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,8192,0.3592
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,16384,0.4786
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,32768,0.7381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,65536,1.2467
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,131072,2.3518
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,128,0.3603
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,256,0.3656
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,512,0.3705
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,2048,0.4139
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,1024,0.3847
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,4096,0.4742
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,8192,0.5947
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,16384,0.8594
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,32768,1.3766
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,65536,2.5322
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,131072,4.8743
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,128,0.5676
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,256,0.5726
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,512,0.5871
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,2048,0.6708
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,1024,0.6138
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,4096,0.7851
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,8192,1.0275
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,16384,1.5096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,32768,2.5437
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,65536,4.9728
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,128,1.0266
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,256,1.0341
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,512,1.0638
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2048,1.2393
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4096,1.4705
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,1024,1.1195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8192,1.9939
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16384,2.9884
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32768,5.0623
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,128,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,256,0.1173
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,512,0.1173
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,1024,0.1210
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,2048,0.1209
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,8192,0.1219
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,4096,0.1335
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,16384,0.1244
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,32768,0.1341
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,65536,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,131072,0.1466
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,128,0.1198
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,512,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,256,0.1198
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,1024,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,2048,0.1216
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,4096,0.1275
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,8192,0.1261
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,16384,0.1375
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,32768,0.1453
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,131072,0.1620
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,65536,0.1515
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,128,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,256,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,1024,0.1244
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,4096,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,2048,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,512,0.1241
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,8192,0.1378
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,16384,0.1425
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,32768,0.1462
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,65536,0.1595
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,131072,0.1794
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,128,0.1257
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,256,0.1279
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,512,0.1279
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,1024,0.1279
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,2048,0.1343
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,4096,0.1377
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,8192,0.1424
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,16384,0.1466
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,32768,0.1600
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,65536,0.1858
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,131072,0.2282
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,512,0.1361
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,128,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,1024,0.1387
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,256,0.1340
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,2048,0.1423
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,4096,0.1472
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,8192,0.1524
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,16384,0.1644
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,32768,0.1894
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,131072,0.3315
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,65536,0.2307
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,128,0.1421
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,256,0.1422
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,512,0.1436
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,1024,0.1472
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,2048,0.1517
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,4096,0.1589
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,8192,0.1706
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,16384,0.1964
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,32768,0.2456
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,65536,0.3497
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,131072,0.5528
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,128,0.1515
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,512,0.1613
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,256,0.1525
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,2048,0.1732
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,4096,0.1833
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,1024,0.1644
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,8192,0.2098
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,16384,0.2596
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,32768,0.3601
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,131072,0.9833
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,65536,0.5692
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,128,0.1804
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,256,0.1834
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,512,0.1862
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,1024,0.1920
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,2048,0.2044
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,4096,0.2312
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,8192,0.2794
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,16384,0.3820
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,32768,0.5985
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,65536,1.0100
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,131072,1.8571
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,128,0.2412
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,256,0.2427
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,512,0.2491
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,1024,0.2636
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,2048,0.2911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,4096,0.3411
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,8192,0.4462
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,16384,0.6637
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,32768,1.0835
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,65536,1.9721
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,131072,3.7278
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,128,0.3519
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,256,0.3575
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,512,0.3701
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,1024,0.3980
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,2048,0.4529
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,4096,0.5579
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,8192,0.7821
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,16384,1.2150
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,32768,2.0801
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,65536,3.8737
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,128,0.5858
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,256,0.6004
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,512,0.6250
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2048,0.7756
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,1024,0.6751
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4096,0.9796
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8192,1.4035
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16384,2.2048
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32768,3.8767
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,128,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,256,0.1017
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,512,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,2048,0.1035
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,1024,0.1056
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,8192,0.1093
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,4096,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,16384,0.1152
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,65536,0.1257
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,32768,0.1209
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,131072,0.1403
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,256,0.1093
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,128,0.1077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,512,0.1049
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,1024,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,2048,0.1124
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,4096,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,8192,0.1145
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,16384,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,32768,0.1299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,65536,0.1383
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,131072,0.1524
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,128,0.1105
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,256,0.1077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,512,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,1024,0.1125
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,2048,0.1092
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,4096,0.1175
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,8192,0.1224
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,16384,0.1302
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,32768,0.1421
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,65536,0.1551
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,131072,0.1843
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,128,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,256,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,512,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,1024,0.1148
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,2048,0.1181
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,4096,0.1224
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,8192,0.1320
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,16384,0.1438
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,32768,0.1586
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,65536,0.1907
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,131072,0.2500
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,128,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,256,0.1157
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,2048,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,1024,0.1238
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,4096,0.1345
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,512,0.1200
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,8192,0.1459
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,16384,0.1616
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,32768,0.1958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,65536,0.2658
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,131072,0.4086
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,128,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,256,0.1211
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,512,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,1024,0.1308
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,2048,0.1395
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,4096,0.1503
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,8192,0.1660
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,16384,0.2021
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,32768,0.2723
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,65536,0.4091
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,131072,0.6957
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,128,0.1311
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,256,0.1322
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,512,0.1360
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,2048,0.1523
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,1024,0.1438
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,4096,0.1694
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,8192,0.2054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,16384,0.2765
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,32768,0.4156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,65536,0.6994
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,131072,1.2576
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,128,0.1521
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,256,0.1544
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,512,0.1592
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,2048,0.1851
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,1024,0.1655
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,4096,0.2209
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,8192,0.2924
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,16384,0.4321
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,32768,0.7241
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,65536,1.2976
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,131072,2.4921
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,128,0.1873
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,256,0.1886
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,512,0.1961
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,1024,0.2153
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,2048,0.2520
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,4096,0.3224
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,8192,0.4622
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,16384,0.7508
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,65536,2.6031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,32768,1.3112
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,131072,5.3104
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,128,0.2585
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,256,0.2576
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,512,0.2740
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,1024,0.3071
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,2048,0.3715
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,4096,0.4995
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,16384,1.2649
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,8192,0.7592
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,32768,2.3021
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,65536,4.8346
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,128,0.4047
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,256,0.4064
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,512,0.4396
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2048,0.6327
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,1024,0.5031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4096,0.8914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8192,1.4011
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16384,2.3939
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32768,4.8823
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,128,0.0961
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,512,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,1024,0.1022
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,256,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,2048,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,4096,0.1023
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,8192,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,16384,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,32768,0.1119
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,65536,0.1199
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,131072,0.1303
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,128,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,512,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,1024,0.1032
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,2048,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,256,0.0958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,4096,0.1037
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,8192,0.1027
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,32768,0.1200
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,16384,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,65536,0.1298
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,131072,0.1443
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,128,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,256,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,512,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,1024,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,2048,0.1035
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,4096,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,8192,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,16384,0.1143
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,32768,0.1278
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,65536,0.1403
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,131072,0.1585
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,256,0.1031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,128,0.1073
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,512,0.1078
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,1024,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,2048,0.1113
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,4096,0.1139
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,8192,0.1158
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,16384,0.1303
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,32768,0.1425
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,65536,0.1627
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,131072,0.2096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,128,0.1080
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,256,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,512,0.1148
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,1024,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,2048,0.1179
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,4096,0.1196
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,8192,0.1333
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,16384,0.1460
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,32768,0.1665
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,65536,0.2131
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,131072,0.2984
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,128,0.1137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,256,0.1166
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,512,0.1187
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,1024,0.1192
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,2048,0.1218
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,4096,0.1336
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,8192,0.1475
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,16384,0.1679
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,32768,0.2165
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,65536,0.3066
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,131072,0.4868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,128,0.1197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,256,0.1184
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,512,0.1240
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,1024,0.1278
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,2048,0.1381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,4096,0.1528
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,8192,0.1733
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,16384,0.2219
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,65536,0.4910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,32768,0.3101
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,131072,0.8485
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,128,0.1290
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,256,0.1299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,512,0.1352
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,2048,0.1585
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,1024,0.1456
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,8192,0.2254
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,4096,0.1816
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,16384,0.3159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,32768,0.4983
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,65536,0.8540
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,131072,1.5749
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,128,0.1519
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,256,0.1560
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,512,0.1647
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,1024,0.1750
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,2048,0.1990
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,4096,0.2458
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,16384,0.5229
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,8192,0.3371
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,32768,0.8806
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,65536,1.5985
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,131072,3.0335
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,128,0.1948
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,256,0.1962
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,512,0.2097
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,1024,0.2320
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,2048,0.2808
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,4096,0.3742
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,8192,0.5619
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,16384,0.9277
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,32768,1.6720
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,65536,3.1553
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,128,0.2782
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,256,0.2821
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,512,0.3048
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2048,0.4387
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,1024,0.3489
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4096,0.6180
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8192,0.9700
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16384,1.6518
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32768,3.0712
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,128,0.0889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,256,0.0965
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,512,0.0963
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,1024,0.0970
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,4096,0.0990
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,2048,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,8192,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,16384,0.1036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,32768,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,65536,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,131072,0.1246
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,128,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,256,0.0980
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,512,0.0974
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,1024,0.0994
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,2048,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,4096,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,8192,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,16384,0.1044
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,65536,0.1277
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,32768,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,131072,0.1387
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,128,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,256,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,512,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,1024,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,2048,0.1007
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,4096,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,8192,0.1051
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,32768,0.1252
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,16384,0.1118
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,65536,0.1344
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,131072,0.1560
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,128,0.1020
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,256,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,512,0.0994
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,1024,0.1060
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,2048,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,4096,0.1070
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,8192,0.1139
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,16384,0.1277
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,32768,0.1381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,65536,0.1616
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,131072,0.2002
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,128,0.1096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,256,0.1052
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,512,0.1109
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,1024,0.1130
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,2048,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,4096,0.1179
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,8192,0.1311
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,16384,0.1406
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,32768,0.1640
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,65536,0.2041
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,131072,0.2919
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,128,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,256,0.1125
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,512,0.1136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,1024,0.1165
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,2048,0.1161
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,4096,0.1301
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,8192,0.1419
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,16384,0.1646
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,32768,0.2124
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,65536,0.2997
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,131072,0.4800
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,128,0.1136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,256,0.1144
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,512,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,1024,0.1194
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,2048,0.1338
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,4096,0.1444
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,8192,0.1680
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,16384,0.2144
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,32768,0.3028
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,131072,0.8418
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,65536,0.4834
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,128,0.1198
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,512,0.1264
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,256,0.1198
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,2048,0.1488
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,4096,0.1712
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,1024,0.1363
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,8192,0.2179
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,16384,0.3054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,65536,0.8425
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,32768,0.4866
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,131072,1.5574
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,256,0.1378
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,512,0.1465
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,2048,0.1839
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,1024,0.1609
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,128,0.1344
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,4096,0.2319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,8192,0.3198
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,16384,0.5026
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,32768,0.8618
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,65536,1.5806
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,131072,3.0442
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,128,0.1668
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,256,0.1688
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,512,0.1822
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,1024,0.2056
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,4096,0.3440
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,2048,0.2534
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,8192,0.5324
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,16384,0.8966
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,32768,1.6262
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,65536,3.0842
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,128,0.2207
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,256,0.2276
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,512,0.2494
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2048,0.3832
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4096,0.5589
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,1024,0.2935
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16384,1.5891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8192,0.9039
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32768,2.9535
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,128,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,512,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,256,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,1024,0.0933
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,2048,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,4096,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,8192,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,16384,0.1002
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,32768,0.0978
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,65536,0.1117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,131072,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,128,0.0935
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,256,0.0914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,512,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,1024,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,2048,0.0920
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,4096,0.0970
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,8192,0.1003
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,16384,0.1015
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,32768,0.1088
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,65536,0.1193
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,131072,0.1411
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,128,0.0954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,256,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,512,0.1001
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,1024,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,2048,0.0981
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,4096,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,8192,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,16384,0.1082
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,32768,0.1183
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,65536,0.1363
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,131072,0.1576
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,128,0.1035
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,256,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,1024,0.1004
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,512,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,2048,0.1071
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,4096,0.1058
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,8192,0.1117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,16384,0.1221
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,32768,0.1384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,131072,0.1986
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,65536,0.1605
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,128,0.1031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,256,0.1097
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,512,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,1024,0.1118
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,2048,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,4096,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,8192,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,16384,0.1384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,32768,0.1636
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,65536,0.2017
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,131072,0.2899
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,128,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,256,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,512,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,2048,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,1024,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,4096,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,8192,0.1405
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,16384,0.1640
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,32768,0.2057
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,65536,0.2968
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,131072,0.4793
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,128,0.1129
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,512,0.1136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,1024,0.1179
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,2048,0.1279
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,4096,0.1424
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,256,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,8192,0.1670
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,16384,0.2098
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,32768,0.3022
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,65536,0.4810
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,131072,0.8392
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,128,0.1166
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,256,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,1024,0.1316
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,512,0.1213
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,2048,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,4096,0.1691
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,8192,0.2126
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,16384,0.3038
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,32768,0.4836
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,65536,0.8380
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,131072,1.5502
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,256,0.1311
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,512,0.1401
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,1024,0.1530
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,2048,0.1790
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,128,0.1302
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,4096,0.2221
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,8192,0.3144
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,16384,0.4947
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,32768,0.8534
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,65536,1.5720
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,131072,3.0600
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,256,0.1586
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,512,0.1689
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,128,0.1544
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,1024,0.1941
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,2048,0.2404
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,4096,0.3331
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,8192,0.5175
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,16384,0.8817
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,32768,1.6089
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,65536,3.0806
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,128,0.1983
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,256,0.2045
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,512,0.2281
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,1024,0.2716
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2048,0.3588
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4096,0.5338
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8192,0.8771
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16384,1.5578
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32768,2.9390
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,128,0.0839
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,256,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,512,0.0904
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,1024,0.0903
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,2048,0.0914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,4096,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,8192,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,16384,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,32768,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,65536,0.1065
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,131072,0.1216
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,128,0.0921
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,256,0.0906
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,512,0.0929
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,1024,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,2048,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,4096,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,8192,0.0922
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,16384,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,32768,0.1052
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,65536,0.1183
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,131072,0.1339
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,128,0.0976
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,256,0.0980
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,512,0.0970
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,1024,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,2048,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,4096,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,16384,0.1058
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,8192,0.0996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,32768,0.1179
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,65536,0.1340
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,131072,0.1541
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,128,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,256,0.1014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,512,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,1024,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,2048,0.1009
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,4096,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,8192,0.1057
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,16384,0.1192
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,32768,0.1363
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,65536,0.1590
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,131072,0.2029
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,128,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,256,0.1057
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,512,0.1071
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,1024,0.1077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,2048,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,8192,0.1207
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,4096,0.1138
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,16384,0.1414
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,32768,0.1602
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,65536,0.2029
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,131072,0.2880
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,128,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,256,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,512,0.1092
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,1024,0.1077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,2048,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,4096,0.1247
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,8192,0.1400
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,16384,0.1609
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,32768,0.2063
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,65536,0.2964
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,131072,0.4755
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,128,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,256,0.1117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,1024,0.1159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,2048,0.1221
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,512,0.1117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,4096,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,8192,0.1649
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,16384,0.2106
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,32768,0.3008
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,65536,0.4793
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,131072,0.8350
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,128,0.1137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,256,0.1155
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,1024,0.1285
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,2048,0.1441
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,512,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,4096,0.1662
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,8192,0.2113
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,16384,0.3028
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,32768,0.4786
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,65536,0.8366
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,131072,1.5454
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,128,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,1024,0.1500
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,256,0.1279
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,2048,0.1729
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,4096,0.2199
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,512,0.1364
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,8192,0.3100
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,16384,0.4893
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,32768,0.8516
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,65536,1.5663
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,131072,3.0054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,128,0.1477
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,256,0.1516
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,512,0.1637
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,1024,0.1874
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,2048,0.2341
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,4096,0.3283
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,8192,0.5081
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,16384,0.8738
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,32768,1.6008
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,65536,3.0585
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,256,0.1930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,128,0.1880
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2048,0.3447
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,512,0.2154
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,1024,0.2589
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4096,0.5209
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8192,0.8615
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16384,1.5413
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32768,2.9141
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,128,0.0960
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,256,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,512,0.0960
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,1024,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,2048,0.0974
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,4096,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,16384,0.0983
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,8192,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,32768,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,65536,0.1143
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,131072,0.1259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,128,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,256,0.0974
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,512,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,1024,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,2048,0.0985
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,4096,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,8192,0.1039
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,16384,0.1052
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,32768,0.1113
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,131072,0.1362
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,65536,0.1275
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,256,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,512,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,128,0.1014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,1024,0.1036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,2048,0.0974
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,4096,0.1039
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,8192,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,16384,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,32768,0.1182
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,65536,0.1360
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,131072,0.1579
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,128,0.1072
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,256,0.1646
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,512,0.1080
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,1024,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,2048,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,4096,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,8192,0.1155
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,16384,0.1209
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,32768,0.1419
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,65536,0.1639
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,131072,0.2062
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,128,0.1119
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,512,0.1099
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,1024,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,2048,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,4096,0.1197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,256,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,8192,0.1283
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,16384,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,32768,0.1667
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,65536,0.2079
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,131072,0.2925
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,128,0.1118
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,256,0.1122
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,512,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,1024,0.1193
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,2048,0.1197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,4096,0.1291
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,16384,0.1644
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,8192,0.1427
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,32768,0.2109
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,65536,0.3003
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,131072,0.4796
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,256,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,128,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,512,0.1177
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,1024,0.1221
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,2048,0.1306
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,8192,0.1665
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,4096,0.1452
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,16384,0.2133
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,32768,0.3032
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,65536,0.4793
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,131072,0.8442
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,128,0.1184
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,256,0.1219
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,512,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,1024,0.1341
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,2048,0.1491
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,4096,0.1693
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,8192,0.2178
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,16384,0.3084
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,32768,0.4866
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,65536,0.8416
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,131072,1.5534
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,128,0.1301
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,256,0.1334
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,512,0.1421
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,2048,0.1813
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,1024,0.1564
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,4096,0.2242
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,8192,0.3163
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,16384,0.4959
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,32768,0.8503
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,65536,1.5690
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,131072,3.0160
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,128,0.1504
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,256,0.1555
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,512,0.1688
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,2048,0.2394
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,1024,0.1913
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,4096,0.3305
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,8192,0.5112
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,16384,0.8742
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,32768,1.6052
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,65536,3.0636
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,128,0.1908
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,256,0.1956
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2048,0.3480
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,512,0.2187
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,1024,0.2621
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4096,0.5197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16384,1.5424
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8192,0.8596
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32768,2.9254
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,128,0.1240
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,256,0.1224
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,512,0.1222
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,1024,0.1259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,4096,0.1340
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,8192,0.1362
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,2048,0.1299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,16384,0.1514
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,32768,0.1464
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,65536,0.1506
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,131072,0.1611
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,128,0.1281
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,256,0.1263
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,512,0.1306
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,1024,0.1359
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,2048,0.1361
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,4096,0.1448
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,16384,0.1442
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,8192,0.1501
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,32768,0.1523
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,65536,0.1641
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,131072,0.1892
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,128,0.1295
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,256,0.1301
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,512,0.1343
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,1024,0.1407
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,2048,0.1449
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,4096,0.1553
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,8192,0.1455
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,16384,0.1503
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,32768,0.1638
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,65536,0.1901
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,131072,0.2394
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,128,0.1403
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,256,0.1377
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,512,0.1425
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,1024,0.1517
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,2048,0.1597
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,4096,0.1496
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,8192,0.1522
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,16384,0.1672
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,32768,0.1939
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,65536,0.2434
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,131072,0.3465
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,128,0.1501
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,512,0.1581
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,256,0.1506
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,1024,0.1624
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,2048,0.1565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,4096,0.1610
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,8192,0.1786
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,16384,0.2038
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,32768,0.2580
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,65536,0.3704
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,131072,0.6035
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,128,0.1557
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,256,0.1548
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,512,0.1609
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,2048,0.1714
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,1024,0.1664
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,8192,0.2139
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,4096,0.1892
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,16384,0.2698
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,32768,0.3807
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,65536,0.6140
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,131072,1.0657
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,128,0.1722
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,256,0.1752
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,512,0.1767
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,1024,0.1845
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,2048,0.1985
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,4096,0.2278
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,8192,0.2801
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,16384,0.3902
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,32768,0.6260
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,65536,1.0756
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,131072,2.0151
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,128,0.2138
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,256,0.2166
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,1024,0.2391
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,512,0.2215
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,4096,0.3232
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,2048,0.2665
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,8192,0.4337
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,16384,0.6708
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,32768,1.1247
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,65536,2.0590
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,131072,4.2258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,128,0.2921
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,256,0.3000
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,512,0.3150
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,1024,0.3455
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,2048,0.4001
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,4096,0.5131
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,8192,0.7499
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,16384,1.2094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,32768,2.1684
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,65536,4.1828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,131072,8.7640
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,128,0.4734
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,512,0.5149
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,256,0.4876
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,2048,0.6790
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,4096,0.8902
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,1024,0.5701
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,8192,1.3176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,16384,2.1929
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,32768,4.0309
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,65536,8.4512
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,128,0.8289
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,256,0.8547
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,512,0.9073
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,2048,1.2335
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,1024,1.0195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,4096,1.6635
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,16384,4.4143
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,8192,2.5826
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,32768,8.4159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,128,0.0982
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,256,0.0914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,512,0.1005
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,1024,0.0986
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,2048,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,4096,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,8192,0.1145
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,16384,0.1195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,32768,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,65536,0.1335
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,131072,0.1534
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,128,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,256,0.1016
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,512,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,1024,0.1014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,2048,0.1073
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,4096,0.1136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,8192,0.1217
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,16384,0.1381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,32768,0.1427
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,65536,0.1579
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,131072,0.1861
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,128,0.1070
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,256,0.1037
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,512,0.1036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,1024,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,2048,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,4096,0.1258
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,8192,0.1375
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,32768,0.1550
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,16384,0.1405
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,65536,0.1803
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,131072,0.2349
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,128,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,256,0.1071
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,512,0.1153
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,1024,0.1199
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,2048,0.1316
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,4096,0.1443
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,8192,0.1385
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,16384,0.1574
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,32768,0.1857
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,65536,0.2344
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,131072,0.3448
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,128,0.1166
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,256,0.1191
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,512,0.1253
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,1024,0.1339
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,2048,0.1494
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,4096,0.1469
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,8192,0.1621
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,16384,0.1877
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,32768,0.2411
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,65536,0.3517
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,131072,0.5527
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,128,0.1300
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,256,0.1320
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,512,0.1381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,1024,0.1533
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,2048,0.1509
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,4096,0.1647
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,8192,0.1958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,16384,0.2453
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,32768,0.3570
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,65536,0.5737
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,131072,0.9918
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,128,0.1384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,256,0.1422
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,1024,0.1626
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,2048,0.1771
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,512,0.1546
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,4096,0.2027
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,8192,0.2558
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,16384,0.3684
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,32768,0.5847
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,65536,1.0030
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,131072,1.8577
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,128,0.1627
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,256,0.1677
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,512,0.1749
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,1024,0.1894
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,2048,0.2170
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,4096,0.2737
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,8192,0.3787
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,16384,0.5924
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,32768,1.0177
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,65536,1.8904
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,131072,3.7412
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,128,0.2058
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,256,0.2151
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,512,0.2323
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,2048,0.3118
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,1024,0.2606
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,4096,0.4214
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,8192,0.6391
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,16384,1.0866
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,32768,1.9599
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,65536,3.8430
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,131072,7.7982
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,128,0.3024
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,256,0.3164
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,512,0.3464
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,1024,0.4023
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,4096,0.7308
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,2048,0.5095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,8192,1.1970
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,16384,2.1125
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,32768,4.0251
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,65536,7.9207
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,128,0.5015
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,256,0.5264
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,512,0.5816
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,2048,0.8865
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,4096,1.3176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,1024,0.6810
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,8192,2.1483
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,16384,4.0288
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,32768,7.7070
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,128,0.0892
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,256,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,512,0.0933
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,1024,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,2048,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,8192,0.1005
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,4096,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,16384,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,32768,0.1155
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,65536,0.1322
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,131072,0.1553
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,128,0.0934
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,256,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,512,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,1024,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,2048,0.0990
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,4096,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,8192,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,16384,0.1180
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,65536,0.1459
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,32768,0.1350
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,131072,0.1900
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,128,0.0970
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,256,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,512,0.0953
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,1024,0.0996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,2048,0.1043
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,4096,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,8192,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,16384,0.1314
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,32768,0.1543
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,65536,0.1935
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,131072,0.2579
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,128,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,256,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,1024,0.1073
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,2048,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,512,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,4096,0.1161
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,8192,0.1351
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,16384,0.1573
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,32768,0.1967
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,65536,0.2619
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,131072,0.4041
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,128,0.1057
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,256,0.1076
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,512,0.1102
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,1024,0.1155
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,2048,0.1196
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,4096,0.1434
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,8192,0.1637
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,16384,0.1992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,32768,0.2763
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,65536,0.4339
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,131072,0.7407
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,128,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,256,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,512,0.1200
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,1024,0.1271
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,2048,0.1487
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,4096,0.1647
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,8192,0.2043
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,16384,0.2808
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,32768,0.4376
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,65536,0.7472
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,131072,1.3712
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,128,0.1215
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,256,0.1237
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,512,0.1344
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,1024,0.1479
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,2048,0.1667
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,4096,0.2054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,8192,0.2817
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,16384,0.4355
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,32768,0.7385
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,65536,1.3593
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,131072,2.5745
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,128,0.1388
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,256,0.1481
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,512,0.1606
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,2048,0.2194
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,4096,0.2948
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,1024,0.1792
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,8192,0.4528
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,16384,0.7555
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,32768,1.3992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,65536,2.6950
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,131072,5.3209
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,128,0.1769
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,256,0.1805
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,512,0.2019
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,1024,0.2408
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,2048,0.3196
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,4096,0.4705
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,8192,0.7718
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,16384,1.4009
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,32768,2.7230
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,65536,5.3578
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,131072,11.3533
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,128,0.2391
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,256,0.2450
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,512,0.2837
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,2048,0.4933
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,1024,0.3534
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,4096,0.7651
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,8192,1.3128
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,16384,2.4986
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,32768,5.2551
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,65536,10.7919
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,128,0.3717
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,256,0.3796
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,512,0.4548
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,1024,0.5990
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,2048,0.8727
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,4096,1.4182
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,16384,5.1833
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,8192,2.5171
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,128,0.0872
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,32768,10.5719
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,256,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,512,0.0884
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,1024,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,2048,0.0884
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,4096,0.0933
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,8192,0.0955
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,16384,0.0995
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,32768,0.1064
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,65536,0.1219
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,131072,0.1394
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,128,0.0893
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,256,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,512,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,1024,0.0894
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,2048,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,4096,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,8192,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,16384,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,32768,0.1150
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,65536,0.1416
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,131072,0.1698
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,128,0.0958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,256,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,512,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,1024,0.0920
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,2048,0.0970
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,4096,0.1037
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,8192,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,16384,0.1146
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,32768,0.1410
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,65536,0.1650
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,131072,0.2173
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,128,0.0994
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,512,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,1024,0.1036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,256,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,2048,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,4096,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,8192,0.1181
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,16384,0.1428
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,32768,0.1701
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,65536,0.2254
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,131072,0.3194
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,128,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,256,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,512,0.1077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,1024,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,2048,0.1096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,4096,0.1201
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,8192,0.1468
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,16384,0.1722
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,32768,0.2287
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,65536,0.3293
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,131072,0.5301
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,128,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,256,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,512,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,1024,0.1155
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,2048,0.1207
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,4096,0.1521
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,8192,0.1752
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,16384,0.2297
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,32768,0.3323
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,65536,0.5447
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,131072,0.9547
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,128,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,256,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,1024,0.1324
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,512,0.1177
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,2048,0.1553
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,4096,0.1790
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,8192,0.2343
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,16384,0.3378
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,32768,0.5484
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,65536,0.9565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,131072,1.7855
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,128,0.1183
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,256,0.1237
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,512,0.1374
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,1024,0.1559
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,2048,0.1816
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,4096,0.2340
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,8192,0.3391
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,16384,0.5427
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,32768,0.9565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,65536,1.7696
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,131072,3.4029
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,128,0.1421
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,256,0.1512
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,512,0.1688
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,1024,0.1970
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,2048,0.2477
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,4096,0.3486
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,8192,0.5568
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,16384,0.9873
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,32768,1.8048
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,65536,3.4420
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,131072,6.8768
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,128,0.1850
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,256,0.1929
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,512,0.2242
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,1024,0.2765
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,2048,0.3752
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,4096,0.5848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,8192,1.0129
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,16384,1.8621
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,32768,3.5048
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,65536,6.9791
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,128,0.2636
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,256,0.2772
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,512,0.3303
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,2048,0.6215
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,1024,0.4264
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,4096,1.0191
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,8192,1.8149
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,16384,3.4668
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,32768,6.7126
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,128,0.0808
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,256,0.0835
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,512,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,1024,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,2048,0.0843
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,4096,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,8192,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,16384,0.0937
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,32768,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,65536,0.1186
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,131072,0.1361
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,128,0.0872
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,256,0.0889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,512,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,1024,0.0871
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,2048,0.0912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,4096,0.0920
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,8192,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,16384,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,32768,0.1081
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,65536,0.1397
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,131072,0.1662
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,128,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,256,0.0934
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,512,0.0953
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,1024,0.0913
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,2048,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,4096,0.0996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,8192,0.1015
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,16384,0.1132
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,32768,0.1384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,65536,0.1633
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,131072,0.2159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,128,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,256,0.0925
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,512,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,1024,0.0995
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,2048,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,4096,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,8192,0.1134
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,16384,0.1381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,32768,0.1669
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,65536,0.2187
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,131072,0.3176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,128,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,256,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,512,0.1050
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,2048,0.1101
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,4096,0.1160
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,8192,0.1425
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,1024,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,16384,0.1722
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,32768,0.2204
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,65536,0.3247
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,131072,0.5308
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,128,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,256,0.1056
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,512,0.1076
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,1024,0.1092
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,4096,0.1442
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,2048,0.1201
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,8192,0.1719
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,16384,0.2194
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,32768,0.3286
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,65536,0.5397
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,131072,0.9535
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,128,0.1092
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,256,0.1092
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,512,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,1024,0.1238
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,2048,0.1480
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,4096,0.1739
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,8192,0.2236
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,16384,0.3332
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,32768,0.5403
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,65536,0.9499
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,131072,1.7645
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,128,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,256,0.1172
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,512,0.1235
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,1024,0.1498
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,2048,0.1746
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,4096,0.2246
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,8192,0.3342
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,16384,0.5452
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,32768,0.9478
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,65536,1.7514
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,131072,3.3983
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,128,0.1260
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,256,0.1370
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,512,0.1565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,2048,0.2334
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,1024,0.1834
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,4096,0.3384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,8192,0.5444
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,16384,0.9732
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,32768,1.7854
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,65536,3.4228
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,131072,6.7542
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,128,0.1629
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,256,0.1706
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,512,0.2000
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,2048,0.3523
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,1024,0.2494
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,4096,0.5617
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,8192,0.9829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,16384,1.8330
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,32768,3.5028
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,65536,6.9296
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,128,0.2197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,256,0.2345
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,512,0.2856
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,1024,0.3803
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,2048,0.5734
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,8192,1.7677
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,4096,0.9716
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,16384,3.4221
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,32768,6.6287
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,128,0.0788
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,256,0.0788
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,512,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,1024,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,2048,0.0872
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,4096,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,8192,0.0888
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,16384,0.0917
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,32768,0.0954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,65536,0.1132
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,131072,0.1337
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,128,0.0844
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,256,0.0873
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,512,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,1024,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,2048,0.0909
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,4096,0.0912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,8192,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,32768,0.1076
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,16384,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,131072,0.1649
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,65536,0.1371
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,128,0.0922
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,2048,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,256,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,512,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,1024,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,4096,0.0988
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,8192,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,16384,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,32768,0.1373
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,131072,0.2105
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,65536,0.1629
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,128,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,512,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,1024,0.1005
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,4096,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,8192,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,256,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,2048,0.1039
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,16384,0.1387
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,32768,0.1628
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,131072,0.3149
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,65536,0.2162
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,256,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,512,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,128,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,1024,0.1070
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,2048,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,4096,0.1177
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,8192,0.1390
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,16384,0.1641
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,32768,0.2174
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,65536,0.3186
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,131072,0.5205
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,128,0.1024
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,256,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,512,0.1112
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,1024,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,2048,0.1183
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,4096,0.1429
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,8192,0.1648
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,16384,0.2220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,32768,0.3276
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,65536,0.5347
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,131072,0.9458
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,128,0.1096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,256,0.1073
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,512,0.1098
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,1024,0.1186
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,2048,0.1460
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,4096,0.1692
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,8192,0.2266
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,16384,0.3299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,32768,0.5395
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,65536,0.9485
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,131072,1.7689
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,128,0.1111
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,256,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,512,0.1196
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,2048,0.1730
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,4096,0.2224
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,1024,0.1457
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,16384,0.5411
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,8192,0.3313
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,32768,0.9444
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,65536,1.7539
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,131072,3.3971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,128,0.1219
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,256,0.1315
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,512,0.1508
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,1024,0.1776
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,2048,0.2299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,4096,0.3338
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,8192,0.5388
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,16384,0.9579
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,32768,1.7801
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,65536,3.4222
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,131072,6.8033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,128,0.1548
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,256,0.1623
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,512,0.1919
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,1024,0.2418
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,2048,0.3444
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,4096,0.5478
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,8192,0.9692
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,16384,1.8347
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,32768,3.4722
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,65536,6.9563
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,128,0.2017
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,256,0.2169
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,512,0.2664
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,1024,0.3606
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,2048,0.5530
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,4096,0.9531
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,8192,1.7556
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,16384,3.3986
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,32768,6.5806
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,128,0.0810
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,256,0.0829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,512,0.0843
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,1024,0.0852
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,8192,0.0889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,4096,0.0825
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,2048,0.0851
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,16384,0.0887
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,32768,0.0995
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,65536,0.1157
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,131072,0.1332
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,128,0.0852
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,256,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,512,0.0889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,1024,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,4096,0.0888
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,8192,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,2048,0.0848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,16384,0.0963
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,65536,0.1376
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,32768,0.1037
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,131072,0.1640
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,128,0.0893
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,256,0.0914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,512,0.0913
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,1024,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,2048,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,4096,0.0928
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,8192,0.0947
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,16384,0.1061
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,32768,0.1345
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,65536,0.1598
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,131072,0.2104
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,128,0.0954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,256,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,512,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,1024,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,2048,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,4096,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,8192,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,16384,0.1372
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,32768,0.1665
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,65536,0.2220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,131072,0.3196
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,256,0.1000
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,512,0.0976
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,1024,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,2048,0.1032
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,128,0.1025
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,4096,0.1132
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,8192,0.1381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,16384,0.1688
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,32768,0.2153
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,131072,0.5155
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,65536,0.3271
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,128,0.1000
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,256,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,512,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,1024,0.1078
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,2048,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,4096,0.1409
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,8192,0.1707
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,16384,0.2147
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,32768,0.3230
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,65536,0.5346
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,131072,0.9457
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,128,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,256,0.1069
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,512,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,1024,0.1206
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,2048,0.1445
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,4096,0.1721
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,8192,0.2241
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,32768,0.5369
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,16384,0.3263
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,65536,0.9462
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,131072,1.7715
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,128,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,256,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,512,0.1175
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,1024,0.1441
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,2048,0.1685
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,4096,0.2230
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,8192,0.3322
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,16384,0.5319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,32768,0.9433
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,65536,1.7583
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,131072,3.3860
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,128,0.1195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,256,0.1235
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,512,0.1477
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,1024,0.1746
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,2048,0.2256
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,4096,0.3300
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,8192,0.5375
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,16384,0.9635
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,32768,1.7816
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,65536,3.4138
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,131072,6.7765
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,128,0.1483
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,512,0.1863
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,256,0.1565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,2048,0.3381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,1024,0.2379
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,4096,0.5428
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,8192,0.9686
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,16384,1.8163
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,32768,3.4636
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,65536,6.9128
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,128,0.1940
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,256,0.2080
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,512,0.2569
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,1024,0.3520
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,2048,0.5446
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,4096,0.9419
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,8192,1.7412
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,16384,3.3600
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,32768,6.5929
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,128,0.0770
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,256,0.0787
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,512,0.0829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,1024,0.0850
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,2048,0.0848
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,8192,0.0809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,4096,0.0792
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,16384,0.0881
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,32768,0.0953
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,65536,0.1121
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,131072,0.1321
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,128,0.0828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,256,0.0832
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,512,0.0871
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,2048,0.0829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,4096,0.0829
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,1024,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,8192,0.0906
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,16384,0.0954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,32768,0.1061
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,65536,0.1360
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,131072,0.1635
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,128,0.0862
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,512,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,256,0.0870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,1024,0.0856
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,2048,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,8192,0.0953
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,4096,0.0916
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,16384,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,32768,0.1313
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,65536,0.1537
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,131072,0.2080
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,128,0.0876
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,256,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,512,0.0954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,2048,0.0999
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,4096,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,1024,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,8192,0.1036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,16384,0.1339
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,32768,0.1583
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,65536,0.2152
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,131072,0.3160
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,128,0.0995
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,256,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,512,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,2048,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,4096,0.1132
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,8192,0.1340
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,1024,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,32768,0.2144
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,65536,0.3158
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,16384,0.1660
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,131072,0.5192
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,128,0.0999
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,256,0.1014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,512,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,1024,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,2048,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,4096,0.1407
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,8192,0.1602
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,16384,0.2161
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,32768,0.3243
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,65536,0.5303
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,131072,0.9413
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,128,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,256,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,512,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,1024,0.1096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,2048,0.1424
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,4096,0.1681
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,8192,0.2159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,16384,0.3234
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,32768,0.5307
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,65536,0.9454
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,131072,1.7620
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,128,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,256,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,512,0.1148
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,1024,0.1418
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,2048,0.1642
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,4096,0.2200
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,8192,0.3242
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,16384,0.5368
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,32768,0.9365
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,65536,1.7526
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,131072,3.4058
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,128,0.1174
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,256,0.1218
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,512,0.1456
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,1024,0.1695
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,2048,0.2254
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,4096,0.3262
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,8192,0.5342
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,16384,0.9602
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,32768,1.7800
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,65536,3.4121
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,131072,6.7898
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,128,0.1448
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,512,0.1801
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,256,0.1521
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,1024,0.2338
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,2048,0.3351
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,4096,0.5393
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,8192,0.9645
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,16384,1.8139
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,32768,3.4653
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,65536,6.8547
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,128,0.1878
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,256,0.2016
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,512,0.2507
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,1024,0.3447
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,2048,0.5394
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,4096,0.9387
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,8192,1.7312
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,16384,3.3452
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,32768,6.5453
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,128,0.1205
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,256,0.1168
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,512,0.1346
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,2048,0.1218
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,4096,0.1264
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,1024,0.1252
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,16384,0.1370
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,8192,0.1468
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,32768,0.1416
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,65536,0.1422
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,131072,0.1503
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,128,0.1235
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,256,0.1257
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,512,0.1276
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,1024,0.1298
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,2048,0.1306
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,4096,0.1340
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,8192,0.1384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,16384,0.1423
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,32768,0.1446
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,65536,0.1528
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,131072,0.1648
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,128,0.1299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,256,0.1278
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,1024,0.1346
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,512,0.1302
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,2048,0.1367
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,4096,0.1412
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,8192,0.1409
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,16384,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,32768,0.1544
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,65536,0.1656
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,131072,0.1878
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,128,0.1340
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,256,0.1360
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,512,0.1360
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,1024,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,2048,0.1468
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,4096,0.1440
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,8192,0.1507
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,16384,0.1571
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,32768,0.1695
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,65536,0.1941
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,131072,0.2429
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,128,0.1471
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,256,0.1483
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,512,0.1460
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,1024,0.1512
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,2048,0.1546
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,4096,0.1558
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,8192,0.1648
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,16384,0.1796
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,32768,0.2077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,65536,0.2628
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,131072,0.3810
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,128,0.1565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,512,0.1660
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,1024,0.1658
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,2048,0.1688
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,4096,0.1754
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,256,0.1583
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,8192,0.1914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,16384,0.2201
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,32768,0.2742
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,65536,0.3921
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,131072,0.6417
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,128,0.1771
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,256,0.1792
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,512,0.1815
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,2048,0.1914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,1024,0.1849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,4096,0.2059
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,8192,0.2337
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,16384,0.2898
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,32768,0.4072
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,65536,0.6654
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,131072,1.1738
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,128,0.2265
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,256,0.2267
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,512,0.2316
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,1024,0.2384
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,2048,0.2530
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,4096,0.2813
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,8192,0.3369
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,16384,0.4573
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,32768,0.7268
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,65536,1.2296
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,131072,2.3120
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,128,0.3198
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,256,0.3223
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,512,0.3310
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,1024,0.3431
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,2048,0.3747
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,4096,0.4308
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,8192,0.5546
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,16384,0.8268
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,65536,2.4836
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,32768,1.3542
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,131072,5.0678
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,128,0.5226
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,256,0.5271
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,512,0.5431
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,1024,0.5696
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,2048,0.6264
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,4096,0.7378
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,16384,1.5198
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,8192,0.9799
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,32768,2.4844
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,65536,4.3945
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,128,0.9425
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,256,0.9507
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,512,0.9772
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2048,1.1488
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,1024,1.0337
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4096,1.3840
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16384,2.9641
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8192,1.9303
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32768,5.0784
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,128,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,256,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,512,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,1024,0.0994
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,2048,0.1003
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,4096,0.0997
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,8192,0.1077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,16384,0.1111
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,32768,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,65536,0.1288
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,131072,0.1363
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,128,0.1027
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,256,0.1023
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,512,0.1032
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,1024,0.1036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,4096,0.1084
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,2048,0.1063
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,8192,0.1160
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,16384,0.1208
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,32768,0.1337
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,65536,0.1428
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,131072,0.1541
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,128,0.1082
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,256,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,512,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,1024,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,2048,0.1119
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,4096,0.1158
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,8192,0.1236
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,16384,0.1305
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,32768,0.1377
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,65536,0.1513
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,131072,0.1732
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,128,0.1124
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,512,0.1111
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,256,0.1118
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,2048,0.1159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,1024,0.1120
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,4096,0.1238
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,8192,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,16384,0.1386
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,32768,0.1524
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,65536,0.1766
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,131072,0.2209
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,128,0.1170
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,256,0.1199
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,1024,0.1238
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,512,0.1201
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,2048,0.1313
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,4096,0.1378
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,8192,0.1446
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,16384,0.1567
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,32768,0.1790
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,65536,0.2280
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,131072,0.3193
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,128,0.1299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,256,0.1257
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,512,0.1310
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,1024,0.1360
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,2048,0.1440
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,4096,0.1496
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,8192,0.1637
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,16384,0.1869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,32768,0.2402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,65536,0.3372
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,131072,0.5462
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,128,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,512,0.1501
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,1024,0.1554
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,2048,0.1633
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,4096,0.1752
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,256,0.1402
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,8192,0.2011
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,16384,0.2513
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,32768,0.3501
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,65536,0.5554
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,131072,0.9885
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,128,0.1671
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,256,0.1682
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,512,0.1730
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,1024,0.1802
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,2048,0.1919
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,4096,0.2182
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,8192,0.2680
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,16384,0.3679
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,32768,0.5752
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,65536,0.9923
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,131072,1.8926
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,128,0.2150
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,256,0.2186
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,512,0.2251
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,1024,0.2400
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,2048,0.2685
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,4096,0.3182
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,8192,0.4208
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,16384,0.6312
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,32768,1.0542
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,65536,1.9792
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,131072,3.6585
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,128,0.3219
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,256,0.3278
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,512,0.3418
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,1024,0.3667
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,2048,0.4221
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,4096,0.5247
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,8192,0.7398
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,16384,1.1733
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,32768,2.0902
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,65536,3.8578
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,128,0.5364
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,256,0.5519
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,512,0.5758
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,1024,0.6265
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2048,0.7259
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4096,0.9208
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8192,1.3201
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16384,2.1338
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32768,3.8543
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,128,0.0894
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,256,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,512,0.0929
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,1024,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,2048,0.0915
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,8192,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,4096,0.0950
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,16384,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,32768,0.1036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,65536,0.1133
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,131072,0.1299
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,128,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,256,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,1024,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,2048,0.0987
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,512,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,4096,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,8192,0.0953
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,16384,0.1015
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,32768,0.1142
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,65536,0.1315
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,131072,0.1405
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,128,0.0990
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,256,0.0916
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,512,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,1024,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,2048,0.1011
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,4096,0.0993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,8192,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,16384,0.1119
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,32768,0.1322
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,65536,0.1436
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,131072,0.1780
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,128,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,256,0.1035
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,512,0.1046
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,1024,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,2048,0.1021
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,4096,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,8192,0.1200
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,16384,0.1354
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,32768,0.1463
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,131072,0.2448
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,65536,0.1814
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,256,0.1096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,512,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,1024,0.1082
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,128,0.1072
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,2048,0.1134
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,4096,0.1197
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,8192,0.1370
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,16384,0.1525
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,32768,0.1907
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,65536,0.2612
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,131072,0.4004
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,128,0.1137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,256,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,512,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,2048,0.1263
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,1024,0.1165
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,4096,0.1426
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,8192,0.1573
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,16384,0.1951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,32768,0.2662
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,65536,0.4077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,131072,0.6882
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,256,0.1198
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,512,0.1237
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,1024,0.1317
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,2048,0.1452
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,128,0.1184
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,4096,0.1618
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,8192,0.1981
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,16384,0.2689
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,32768,0.4133
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,65536,0.6906
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,131072,1.2467
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,128,0.1373
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,256,0.1388
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,512,0.1466
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,1024,0.1565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,2048,0.1754
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,4096,0.2137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,8192,0.2828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,16384,0.4254
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,32768,0.7142
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,65536,1.2807
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,131072,2.5136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,128,0.1713
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,256,0.1729
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,512,0.1828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,1024,0.2003
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,2048,0.2373
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,4096,0.3084
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,8192,0.4483
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,16384,0.7349
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,32768,1.3026
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,65536,2.4852
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,131072,5.2033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,128,0.2379
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,256,0.2370
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,512,0.2518
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,1024,0.2858
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,2048,0.3520
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,4096,0.4793
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,8192,0.7385
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,16384,1.2447
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,32768,2.3157
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,65536,4.7776
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,128,0.3726
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,256,0.3730
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,512,0.4049
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,1024,0.4706
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2048,0.6003
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4096,0.8577
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16384,2.4031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8192,1.3792
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32768,4.6666
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,128,0.0867
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,256,0.0873
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,512,0.0830
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,1024,0.0909
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,2048,0.0906
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,4096,0.0909
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,8192,0.0924
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,16384,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,32768,0.1011
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,65536,0.1076
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,131072,0.1160
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,128,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,256,0.0871
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,512,0.0920
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,1024,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,2048,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,4096,0.0934
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,8192,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,16384,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,32768,0.1072
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,65536,0.1174
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,131072,0.1333
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,128,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,256,0.0932
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,512,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,1024,0.0950
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,2048,0.0950
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,4096,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,8192,0.0967
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,16384,0.1045
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,32768,0.1166
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,65536,0.1296
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,131072,0.1571
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,128,0.0934
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,256,0.1009
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,512,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,1024,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,2048,0.1060
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,4096,0.1014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,8192,0.1079
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,16384,0.1175
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,32768,0.1320
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,65536,0.1615
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,131072,0.2022
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,128,0.1052
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,512,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,256,0.1056
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,1024,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,2048,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,4096,0.1118
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,8192,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,16384,0.1369
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,32768,0.1627
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,65536,0.2061
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,131072,0.2905
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,128,0.1097
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,256,0.1067
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,512,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,1024,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,2048,0.1142
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,4096,0.1226
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,8192,0.1382
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,16384,0.1647
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,32768,0.2105
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,65536,0.2979
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,131072,0.4816
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,128,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,256,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,512,0.1134
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,1024,0.1159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,2048,0.1279
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,4096,0.1445
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,8192,0.1690
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,16384,0.2139
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,32768,0.3050
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,65536,0.4840
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,131072,0.8417
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,128,0.1199
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,256,0.1198
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,512,0.1239
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,1024,0.1338
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,2048,0.1471
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,4096,0.1736
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,8192,0.2189
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,16384,0.3077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,32768,0.4911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,65536,0.8451
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,131072,1.5689
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,128,0.1362
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,256,0.1380
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,512,0.1523
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,1024,0.1638
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,2048,0.1870
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,4096,0.2343
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,8192,0.3237
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,16384,0.5088
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,32768,0.8721
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,65536,1.5896
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,131072,3.0635
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,128,0.1770
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,256,0.1794
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,512,0.1903
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,2048,0.2648
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,1024,0.2158
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,8192,0.5462
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,4096,0.3565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,16384,0.9168
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,32768,1.6450
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,65536,3.1389
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,128,0.2539
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,256,0.2557
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,512,0.2806
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2048,0.4152
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4096,0.5936
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,1024,0.3261
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8192,0.9430
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16384,1.6285
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32768,3.0179
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,128,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,256,0.0787
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,512,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,1024,0.0812
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,2048,0.0871
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,8192,0.0903
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,4096,0.0889
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,16384,0.0909
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,32768,0.0982
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,131072,0.1129
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,65536,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,128,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,256,0.0850
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,512,0.0867
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,1024,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,2048,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,4096,0.0908
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,16384,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,8192,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,32768,0.1029
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,65536,0.1113
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,131072,0.1283
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,128,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,256,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,512,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,1024,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,4096,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,2048,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,8192,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,16384,0.1032
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,32768,0.1117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,65536,0.1280
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,131072,0.1495
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,128,0.0914
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,256,0.0916
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,512,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,1024,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,2048,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,4096,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,16384,0.1123
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,8192,0.1039
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,32768,0.1285
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,65536,0.1511
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,131072,0.1962
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,128,0.0989
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,256,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,512,0.1041
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,1024,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,2048,0.1065
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,4096,0.1078
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,8192,0.1163
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,16384,0.1320
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,32768,0.1547
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,65536,0.2004
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,131072,0.2861
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,128,0.1037
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,256,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,512,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,1024,0.1097
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,2048,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,16384,0.1565
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,4096,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,8192,0.1339
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,32768,0.2036
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,65536,0.2953
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,131072,0.4739
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,128,0.1098
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,256,0.1075
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,512,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,1024,0.1118
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,2048,0.1177
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,4096,0.1393
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,8192,0.1606
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,16384,0.2083
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,32768,0.2985
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,65536,0.4773
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,131072,0.8355
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,128,0.1136
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,256,0.1135
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,512,0.1153
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,1024,0.1222
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,2048,0.1421
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,4096,0.1655
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,8192,0.2084
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,16384,0.3008
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,32768,0.4801
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,65536,0.8372
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,131072,1.5491
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,128,0.1243
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,256,0.1237
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,512,0.1346
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,1024,0.1484
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,4096,0.2173
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,8192,0.3112
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,2048,0.1731
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,16384,0.4919
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,32768,0.8519
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,65536,1.5702
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,131072,3.0456
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,128,0.1485
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,256,0.1549
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,512,0.1655
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,2048,0.2385
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,1024,0.1911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,4096,0.3298
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,8192,0.5167
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,16384,0.8816
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,32768,1.6128
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,65536,3.0847
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,128,0.2017
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,256,0.2064
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,512,0.2284
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2048,0.3638
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,1024,0.2740
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4096,0.5403
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16384,1.5700
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8192,0.8854
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32768,2.9433
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,128,0.0828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,256,0.0815
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,512,0.0810
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,1024,0.0809
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,2048,0.0808
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,4096,0.0869
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,8192,0.0868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,16384,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,32768,0.0951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,65536,0.0996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,131072,0.1097
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,128,0.0871
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,256,0.0892
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,512,0.0860
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,1024,0.0867
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,2048,0.0898
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,4096,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,8192,0.0937
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,16384,0.0954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,32768,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,65536,0.1137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,131072,0.1296
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,128,0.0916
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,256,0.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,512,0.0892
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,1024,0.0935
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,2048,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,4096,0.0958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,8192,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,16384,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,32768,0.1089
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,65536,0.1265
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,131072,0.1503
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,128,0.0912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,256,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,512,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,1024,0.0978
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,2048,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,4096,0.0999
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,8192,0.1035
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,16384,0.1144
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,32768,0.1276
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,65536,0.1514
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,131072,0.1955
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,128,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,256,0.0991
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,512,0.1014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,1024,0.1058
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,2048,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,4096,0.1061
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,8192,0.1128
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,16384,0.1286
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,32768,0.1507
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,65536,0.1972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,131072,0.2858
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,128,0.1037
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,256,0.1053
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,512,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,1024,0.1085
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,2048,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,4096,0.1155
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,8192,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,16384,0.1551
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,32768,0.2029
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,131072,0.4714
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,65536,0.2930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,128,0.1070
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,256,0.1078
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,512,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,1024,0.1111
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,4096,0.1351
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,2048,0.1176
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,8192,0.1582
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,16384,0.2068
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,32768,0.2951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,65536,0.4751
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,131072,0.8337
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,128,0.1117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,256,0.1103
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,512,0.1137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,1024,0.1195
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,2048,0.1368
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,4096,0.1607
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,8192,0.2057
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,16384,0.2961
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,32768,0.4763
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,65536,0.8340
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,131072,1.5460
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,256,0.1184
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,512,0.1262
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,128,0.1186
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,1024,0.1429
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,2048,0.1675
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,4096,0.2128
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,8192,0.3040
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,16384,0.4845
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,32768,0.8433
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,65536,1.5623
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,131072,3.0114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,128,0.1367
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,256,0.1446
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,512,0.1562
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,1024,0.1784
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,2048,0.2279
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,4096,0.3200
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,8192,0.5040
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,32768,1.5978
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,16384,0.8682
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,65536,3.0658
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,128,0.1821
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,256,0.1863
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,512,0.2094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,1024,0.2539
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2048,0.3409
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4096,0.5166
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8192,0.8579
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16384,1.5412
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32768,2.9097
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,128,0.0787
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,256,0.0770
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,512,0.0789
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,2048,0.0811
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,4096,0.0828
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,1024,0.0789
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,8192,0.0868
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,16384,0.0836
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,32768,0.0898
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,65536,0.0967
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,131072,0.1091
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,128,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,256,0.0851
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,512,0.0873
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,1024,0.0881
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,2048,0.0837
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,4096,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,16384,0.0894
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,8192,0.0893
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,32768,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,65536,0.1064
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,131072,0.1297
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,256,0.0852
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,128,0.0911
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,512,0.0927
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,1024,0.0910
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,2048,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,4096,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,16384,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,8192,0.0950
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,32768,0.1076
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,65536,0.1241
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,131072,0.1496
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,128,0.0907
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,256,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,512,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,1024,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,2048,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,4096,0.0992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,8192,0.1014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,16384,0.1096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,32768,0.1267
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,65536,0.1525
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,131072,0.1958
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,128,0.0973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,512,0.0972
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,256,0.1012
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,1024,0.0995
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,2048,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,4096,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,8192,0.1137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,16384,0.1277
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,32768,0.1529
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,65536,0.1954
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,131072,0.2836
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,256,0.1052
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,128,0.0994
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,512,0.0997
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,1024,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,2048,0.1087
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,4096,0.1114
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,8192,0.1302
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,16384,0.1522
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,32768,0.1993
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,65536,0.2934
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,131072,0.4675
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,128,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,256,0.1057
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,1024,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,2048,0.1117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,512,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,4096,0.1342
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,8192,0.1576
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,16384,0.2013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,32768,0.2957
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,65536,0.4738
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,131072,0.8307
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,128,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,256,0.1077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,512,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,1024,0.1139
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,2048,0.1363
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,4096,0.1600
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,8192,0.2034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,16384,0.2929
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,32768,0.4742
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,65536,0.8303
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,131072,1.5417
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,128,0.1147
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,256,0.1159
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,512,0.1217
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,1024,0.1399
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,2048,0.1642
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,4096,0.2099
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,8192,0.3001
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,16384,0.4812
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,32768,0.8403
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,65536,1.5591
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,131072,2.9961
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,128,0.1319
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,256,0.1381
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,512,0.1515
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,2048,0.2217
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,1024,0.1743
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,4096,0.3143
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,8192,0.4969
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,16384,0.8625
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,32768,1.5896
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,65536,3.0620
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,128,0.1711
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,256,0.1775
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,512,0.1998
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2048,0.3302
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,1024,0.2427
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4096,0.5042
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8192,0.8450
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16384,1.5363
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32768,2.8951
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,128,0.0849
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,256,0.0851
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,1024,0.0890
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,2048,0.0913
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,512,0.0888
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,4096,0.0850
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,8192,0.0936
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,16384,0.0906
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,32768,0.0981
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,65536,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,131072,0.1170
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,128,0.0913
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,256,0.0909
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,512,0.0920
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,1024,0.0961
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,4096,0.0930
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,2048,0.0881
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,8192,0.0955
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,16384,0.0955
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,32768,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,65536,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,131072,0.1353
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,128,0.0931
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,256,0.0935
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,2048,0.0912
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,1024,0.0971
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,512,0.0952
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,4096,0.0975
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,8192,0.0996
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,16384,0.1009
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,32768,0.1097
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,65536,0.1322
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,131072,0.1495
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,128,0.0964
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,256,0.1033
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,512,0.1013
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,1024,0.1034
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,2048,0.0994
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,4096,0.1073
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,8192,0.1077
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,16384,0.1128
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,32768,0.1328
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,65536,0.1582
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,131072,0.2014
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,128,0.1073
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,256,0.1057
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,512,0.1073
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,1024,0.1055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,4096,0.1137
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,2048,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,8192,0.1164
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,16384,0.1359
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,32768,0.1629
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,65536,0.2042
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,131072,0.2887
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,128,0.1061
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,512,0.1074
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,256,0.1054
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,1024,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,2048,0.1117
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,4096,0.1184
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,8192,0.1341
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,16384,0.1598
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,32768,0.2096
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,65536,0.2950
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,131072,0.4741
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,128,0.1095
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,256,0.1094
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,512,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,1024,0.1115
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,2048,0.1207
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,4096,0.1383
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,16384,0.2093
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,8192,0.1648
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,32768,0.3000
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,65536,0.4776
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,131072,0.8363
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,128,0.1116
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,256,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,512,0.1156
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,1024,0.1263
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,2048,0.1434
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,4096,0.1668
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,8192,0.2097
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,16384,0.3001
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,32768,0.4765
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,65536,0.8353
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,131072,1.5495
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,128,0.1199
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,256,0.1220
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,512,0.1298
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,1024,0.1475
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,2048,0.1713
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,4096,0.2160
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,8192,0.3055
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,16384,0.4864
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,32768,0.8465
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,65536,1.5655
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,131072,2.9973
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,256,0.1401
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,128,0.1358
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,512,0.1562
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,1024,0.1794
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,2048,0.2255
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,4096,0.3163
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,8192,0.4992
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,16384,0.8646
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,32768,1.5919
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,65536,3.0891
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,128,0.1746
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,256,0.1792
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,512,0.2035
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2048,0.3317
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,1024,0.2467
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4096,0.5031
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8192,0.8449
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16384,1.5250
VLLM,0.16.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32768,2.8900
