framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,256,0.1349
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,128,0.1337
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,512,0.1376
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,1024,0.1417
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,2048,0.1437
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,8192,0.1481
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,4096,0.1476
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,16384,0.1665
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,32768,0.1530
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,65536,0.1649
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1,1,131072,0.1760
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,128,0.1430
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,512,0.1440
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,256,0.1445
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,1024,0.1439
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,2048,0.1475
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,4096,0.1535
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,8192,0.1627
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,16384,0.1575
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,32768,0.1672
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,65536,0.1750
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1,1,131072,0.1998
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,128,0.1423
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,512,0.1479
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,256,0.1476
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,1024,0.1511
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,2048,0.1604
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,4096,0.1671
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,8192,0.1565
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,16384,0.1621
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,32768,0.1770
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,65536,0.2047
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1,1,131072,0.2516
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,128,0.1517
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,512,0.1563
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,1024,0.1643
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,256,0.1532
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,4096,0.1610
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,16384,0.1792
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,8192,0.1676
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,2048,0.1710
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,32768,0.2049
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,65536,0.2552
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1,1,131072,0.3519
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,128,0.1629
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,256,0.1667
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,512,0.1707
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,1024,0.1755
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,2048,0.1708
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,4096,0.1796
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,8192,0.1911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,16384,0.2182
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,32768,0.2688
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,65536,0.3772
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1,1,131072,0.5881
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,128,0.1743
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,256,0.1722
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,512,0.1836
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,2048,0.1910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,1024,0.1848
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,4096,0.2080
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,16384,0.2841
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,8192,0.2337
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,32768,0.3909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,65536,0.6032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1,1,131072,1.0230
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,128,0.1981
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,512,0.2051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,256,0.2002
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,1024,0.2074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,2048,0.2247
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,4096,0.2484
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,8192,0.3021
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,16384,0.4080
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,32768,0.6186
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,65536,1.0376
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1,1,131072,1.8728
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,128,0.2422
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,256,0.2413
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,512,0.2517
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,1024,0.2675
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,2048,0.2921
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,4096,0.3477
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,8192,0.4517
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,16384,0.6641
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,32768,1.0839
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,65536,1.9253
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1,1,131072,3.6000
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,128,0.3238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,256,0.3305
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,512,0.3453
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,1024,0.3760
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,2048,0.4293
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,4096,0.5365
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,8192,0.7478
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,16384,1.1700
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,32768,2.0158
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,65536,3.7107
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,1,1,131072,7.1986
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,128,0.5325
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,256,0.5472
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,512,0.5725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,1024,0.6235
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,2048,0.7243
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,4096,0.9187
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,8192,1.3029
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,16384,2.0759
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,32768,3.6252
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,512,1,1,65536,6.8482
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,128,0.9490
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,256,0.9680
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,512,1.0188
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,2048,1.3203
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,4096,1.7055
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,1024,1.1214
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,8192,2.4724
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,128,0.1136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,256,0.1039
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,512,0.1137
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,1024,0.1120
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,16384,4.0258
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1024,1,1,32768,7.1969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,2048,0.1107
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,4096,0.1192
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,16384,0.1294
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,8192,0.1169
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,32768,0.1450
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,65536,0.1363
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1,1,131072,0.1504
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,128,0.1153
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,256,0.1142
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,2048,0.1150
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,512,0.1153
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,4096,0.1216
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,8192,0.1264
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,16384,0.1358
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,1024,0.1208
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,32768,0.1488
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,65536,0.1617
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1,1,131072,0.1876
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,256,0.1175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,1024,0.1186
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,512,0.1168
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,128,0.1140
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,2048,0.1262
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,4096,0.1329
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,8192,0.1376
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,32768,0.1535
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,65536,0.1824
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,131072,0.2344
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1,1,16384,0.1439
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,512,0.1240
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,128,0.1175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,256,0.1226
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,1024,0.1279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,2048,0.1345
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,16384,0.1555
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,8192,0.1419
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,4096,0.1427
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,32768,0.1877
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,65536,0.2360
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1,1,131072,0.3448
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,128,0.1333
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,256,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,512,0.1340
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,1024,0.1444
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,4096,0.1488
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,2048,0.1514
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,8192,0.1639
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,16384,0.1913
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,32768,0.2466
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,65536,0.3467
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1,1,131072,0.5616
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,128,0.1426
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,256,0.1431
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,512,0.1461
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,1024,0.1555
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,2048,0.1567
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,8192,0.1970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,32768,0.3574
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,16384,0.2526
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,4096,0.1696
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,65536,0.5748
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1,1,131072,0.9859
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,128,0.1455
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,256,0.1487
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,512,0.1607
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,2048,0.1835
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,4096,0.2094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,1024,0.1661
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,8192,0.2634
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,16384,0.3679
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,32768,0.5858
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,65536,0.9993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1,1,131072,1.8288
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,128,0.1719
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,256,0.1761
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,512,0.1846
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,2048,0.2256
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,1024,0.2004
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,4096,0.2776
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,8192,0.3833
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,16384,0.6006
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,32768,1.0157
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,65536,1.8455
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1,1,131072,3.5157
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,128,0.2201
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,256,0.2261
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,512,0.2439
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,1024,0.2753
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,2048,0.3279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,4096,0.4312
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,8192,0.6425
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,16384,1.0870
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,32768,1.9296
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,65536,3.6242
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,128,0.3290
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,256,0.3460
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,512,0.3742
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,1,1,131072,7.0235
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,2048,0.5385
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,1024,0.4274
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,4096,0.7551
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,8192,1.1978
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,16384,2.0819
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,32768,3.8098
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,512,1,1,65536,7.2785
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,128,0.5608
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,256,0.5849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,512,0.6394
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,2048,0.9400
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,4096,1.3510
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,1024,0.7411
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,8192,2.1809
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,16384,3.8769
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,128,0.0911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,256,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,512,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,1024,0.0928
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1024,1,1,32768,7.0784
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,2048,0.1014
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,4096,0.1104
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,8192,0.0965
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,16384,0.1090
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,32768,0.1156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,65536,0.1232
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1,1,131072,0.1545
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,128,0.0946
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,256,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,512,0.1014
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,1024,0.0964
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,2048,0.0967
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,4096,0.1017
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,8192,0.1077
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,16384,0.1154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,32768,0.1287
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,65536,0.1440
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1,1,131072,0.1805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,128,0.0987
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,256,0.0988
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,512,0.0967
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,1024,0.1016
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,2048,0.1072
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,4096,0.1068
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,8192,0.1140
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,16384,0.1256
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,32768,0.1455
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,65536,0.1828
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1,1,131072,0.2502
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,128,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,256,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,512,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,2048,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,1024,0.1106
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,4096,0.1211
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,8192,0.1284
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,16384,0.1480
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,32768,0.1844
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,65536,0.2572
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1,1,131072,0.3960
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,128,0.1108
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,256,0.1101
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,512,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,1024,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,2048,0.1258
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,4096,0.1378
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,8192,0.1537
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,16384,0.1920
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,32768,0.2687
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,65536,0.4242
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1,1,131072,0.7388
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,128,0.1190
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,256,0.1180
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,512,0.1236
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,1024,0.1326
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,2048,0.1421
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,4096,0.1599
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,8192,0.2018
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,16384,0.2741
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,32768,0.4340
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,65536,0.7441
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1,1,131072,1.3623
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,128,0.1298
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,256,0.1282
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,512,0.1359
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,2048,0.1644
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,1024,0.1458
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,4096,0.2037
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,8192,0.2810
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,16384,0.4361
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,32768,0.7363
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,65536,1.3516
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1,1,131072,2.5627
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,128,0.1495
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,256,0.1461
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,512,0.1607
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,1024,0.1798
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,2048,0.2205
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,4096,0.2951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,8192,0.4483
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,16384,0.7492
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,32768,1.4064
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,65536,2.6871
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1,1,131072,5.1727
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,128,0.1800
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,256,0.1854
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,512,0.2060
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,1024,0.2448
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,2048,0.3224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,4096,0.4701
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,16384,1.4126
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,8192,0.7726
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,32768,2.6925
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,65536,5.3702
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,128,0.2500
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,256,0.2551
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,512,0.2942
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,2048,0.5059
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,1024,0.3646
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,1,1,131072,10.8825
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,4096,0.7796
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,8192,1.3235
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,16384,2.4883
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,32768,4.9714
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,128,0.3953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,256,0.4027
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,512,1,1,65536,10.1311
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,512,0.4798
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,2048,0.9017
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,1024,0.6214
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,4096,1.4351
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,8192,2.5294
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,16384,4.9248
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,128,0.0815
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,512,0.0790
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,1024,0.0849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,2048,0.0820
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,4096,0.0824
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,256,0.0806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,8192,0.0848
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,16384,0.0964
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,32768,0.1070
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,65536,0.1187
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1,1,131072,0.1275
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,128,0.0811
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,512,0.0815
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,256,0.0806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1024,1,1,32768,10.1239
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,1024,0.0811
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,2048,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,4096,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,8192,0.0965
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,16384,0.1041
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,32768,0.1183
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,65536,0.1337
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1,1,131072,0.1613
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,128,0.0825
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,256,0.0806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,512,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,1024,0.0849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,2048,0.0892
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,4096,0.0962
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,8192,0.1026
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,16384,0.1168
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,65536,0.1535
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,32768,0.1347
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1,1,131072,0.2077
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,128,0.0847
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,256,0.0914
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,512,0.0899
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,1024,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,2048,0.0978
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,8192,0.1182
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,16384,0.1344
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,32768,0.1620
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,65536,0.2181
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,131072,0.3115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1,1,4096,0.1111
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,128,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,256,0.0934
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,512,0.0952
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,1024,0.1040
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,2048,0.1096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,4096,0.1221
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,8192,0.1377
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,16384,0.1607
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,32768,0.2144
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,65536,0.3182
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1,1,131072,0.5176
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,128,0.1009
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,256,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,512,0.1064
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,1024,0.1170
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,2048,0.1274
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,4096,0.1419
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,8192,0.1682
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,16384,0.2197
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,32768,0.3271
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,65536,0.5348
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1,1,131072,0.9474
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,128,0.1134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,256,0.1147
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,512,0.1220
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,1024,0.1345
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,4096,0.1744
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,2048,0.1487
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,8192,0.2271
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,16384,0.3336
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,32768,0.5404
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,65536,0.9511
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1,1,131072,1.7716
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,128,0.1240
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,256,0.1276
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,512,0.1369
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,1024,0.1494
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,4096,0.2307
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,8192,0.3326
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,16384,0.5429
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,2048,0.1767
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,32768,0.9498
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,65536,1.7663
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1,1,131072,3.4035
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,128,0.1463
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,256,0.1511
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,512,0.1649
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,2048,0.2447
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,1024,0.1954
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,4096,0.3473
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,8192,0.5590
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,16384,0.9769
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,32768,1.7941
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,65536,3.4387
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,128,0.1884
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,256,0.1970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,1,1,131072,6.7637
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,512,0.2279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,2048,0.3792
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,4096,0.5918
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,1024,0.2765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,8192,1.0128
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,16384,1.8648
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,32768,3.5167
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,512,1,1,65536,6.8473
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,128,0.2694
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,256,0.2853
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,512,0.3375
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,1024,0.4356
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,2048,0.6302
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,4096,1.0283
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,16384,3.4464
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,128,0.0724
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,256,0.0687
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,8192,1.8249
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,512,0.0704
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,1024,0.0725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,2048,0.0789
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,4096,0.0745
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,8192,0.0829
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,16384,0.0847
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,32768,0.0935
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,65536,0.1065
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1,1,131072,0.1250
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1024,1,1,32768,6.6309
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,128,0.0744
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,256,0.0725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,512,0.0809
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,1024,0.0765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,2048,0.0764
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,4096,0.0806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,8192,0.0862
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,16384,0.0932
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,32768,0.1105
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,65536,0.1298
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1,1,131072,0.1504
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,128,0.0825
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,256,0.0744
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,512,0.0766
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,1024,0.0787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,2048,0.0767
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,4096,0.0886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,8192,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,16384,0.1124
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,32768,0.1237
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,65536,0.1464
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1,1,131072,0.1984
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,128,0.0788
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,256,0.0808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,512,0.0828
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,1024,0.0832
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,2048,0.0932
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,4096,0.0955
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,8192,0.1097
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,16384,0.1273
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,32768,0.1503
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,65536,0.2020
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1,1,131072,0.3064
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,128,0.0935
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,256,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,512,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,1024,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,2048,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,4096,0.1153
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,8192,0.1297
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,16384,0.1564
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,32768,0.2181
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,65536,0.3107
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1,1,131072,0.5149
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,128,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,256,0.0972
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,512,0.0972
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,2048,0.1195
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,1024,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,4096,0.1364
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,16384,0.2124
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,8192,0.1594
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,32768,0.3158
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,65536,0.5306
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1,1,131072,0.9413
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,128,0.1026
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,256,0.1028
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,512,0.1131
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,2048,0.1418
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,1024,0.1248
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,4096,0.1638
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,8192,0.2163
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,16384,0.3224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,32768,0.5326
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,65536,0.9413
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1,1,131072,1.7588
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,128,0.1136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,256,0.1126
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,512,0.1259
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,1024,0.1418
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,2048,0.1688
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,4096,0.2209
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,8192,0.3281
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,16384,0.5353
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,32768,0.9355
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,65536,1.7451
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1,1,131072,3.3689
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,128,0.1323
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,256,0.1360
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,512,0.1539
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,1024,0.1804
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,2048,0.2339
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,4096,0.3347
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,8192,0.5453
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,16384,0.9664
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,32768,1.7775
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,65536,3.4210
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,128,0.1645
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,256,0.1699
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,512,0.1976
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,1,1,131072,6.7006
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,2048,0.3507
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,1024,0.2506
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,4096,0.5601
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,8192,0.9876
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,32768,3.5068
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,16384,1.8366
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,128,0.2191
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,512,1,1,65536,6.8406
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,256,0.2337
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,512,0.2856
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,2048,0.5762
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,1024,0.3815
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,4096,0.9698
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,128,0.0684
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,32768,6.5787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,8192,1.7659
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,256,0.0666
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,512,0.0702
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1024,1,1,16384,3.3989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,2048,0.0703
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,1024,0.0707
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,4096,0.0685
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,8192,0.0743
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,16384,0.0765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,32768,0.0893
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,65536,0.1065
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1,1,131072,0.1252
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,128,0.0705
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,256,0.0767
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,512,0.0784
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,2048,0.0726
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,4096,0.0740
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,8192,0.0789
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,16384,0.0863
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,32768,0.1078
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,1024,0.0787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,65536,0.1279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1,1,131072,0.1497
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,128,0.0726
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,256,0.0745
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,512,0.0809
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,1024,0.0751
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,2048,0.0826
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,4096,0.0845
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,8192,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,16384,0.1057
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,32768,0.1193
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,65536,0.1476
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1,1,131072,0.1968
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,128,0.0777
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,256,0.0788
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,512,0.0870
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,1024,0.0825
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,2048,0.0867
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,4096,0.0891
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,8192,0.1055
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,16384,0.1217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,32768,0.1518
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,65536,0.2043
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1,1,131072,0.3023
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,128,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,256,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,512,0.0907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,1024,0.0911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,2048,0.1007
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,4096,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,8192,0.1276
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,16384,0.1543
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,32768,0.2107
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,65536,0.3114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1,1,131072,0.5080
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,256,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,128,0.0911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,512,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,1024,0.1034
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,2048,0.1138
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,4096,0.1357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,8192,0.1610
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,16384,0.2098
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,32768,0.3143
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,65536,0.5224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1,1,131072,0.9334
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,128,0.0996
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,256,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,512,0.1050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,1024,0.1219
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,2048,0.1364
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,4096,0.1634
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,8192,0.2124
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,16384,0.3204
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,32768,0.5313
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,65536,0.9430
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1,1,131072,1.7572
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,128,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,256,0.1054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,512,0.1219
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,1024,0.1378
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,2048,0.1648
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,4096,0.2159
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,8192,0.3232
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,16384,0.5306
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,32768,0.9326
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,65536,1.7461
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1,1,131072,3.3765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,128,0.1258
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,256,0.1293
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,512,0.1442
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,2048,0.2275
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,1024,0.1730
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,4096,0.3255
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,16384,0.9581
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,8192,0.5419
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,32768,1.7818
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,65536,3.4057
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,128,0.1518
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,256,0.1577
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,1,1,131072,6.7049
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,512,0.1866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,2048,0.3418
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,1024,0.2382
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,4096,0.5478
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,8192,0.9648
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,16384,1.8306
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,32768,3.4806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,512,1,1,65536,6.7981
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,128,0.1966
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,256,0.2099
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,512,0.2606
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,2048,0.5487
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,1024,0.3557
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,4096,0.9462
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,16384,3.3704
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,8192,1.7337
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,128,0.0643
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,256,0.0622
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,512,0.0662
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,1024,0.0723
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,2048,0.0662
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,4096,0.0644
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,8192,0.0722
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,16384,0.0725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,65536,0.1090
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,32768,0.0848
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1,1,131072,0.1146
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1024,1,1,32768,6.4684
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,128,0.0728
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,256,0.0661
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,512,0.0742
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,1024,0.0705
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,2048,0.0703
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,4096,0.0725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,8192,0.0746
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,16384,0.0848
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,65536,0.1227
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,131072,0.1449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1,1,32768,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,128,0.0745
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,256,0.0724
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,512,0.0768
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,1024,0.0749
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,2048,0.0763
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,4096,0.0788
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,8192,0.0887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,16384,0.1008
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,32768,0.1180
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,65536,0.1463
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1,1,131072,0.1947
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,128,0.0767
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,256,0.0763
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,512,0.0846
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,1024,0.0827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,2048,0.0870
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,8192,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,4096,0.0851
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,32768,0.1474
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,65536,0.2066
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,16384,0.1183
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1,1,131072,0.3043
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,128,0.0848
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,256,0.0887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,512,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,1024,0.0910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,2048,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,4096,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,8192,0.1238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,32768,0.2054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,65536,0.3109
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,131072,0.5045
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1,1,16384,0.1512
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,128,0.0899
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,256,0.0885
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,512,0.0928
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,1024,0.0965
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,2048,0.1107
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,4096,0.1279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,8192,0.1574
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,16384,0.2065
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,32768,0.3103
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,65536,0.5230
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1,1,131072,0.9297
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,128,0.0955
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,256,0.0989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,512,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,2048,0.1371
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,1024,0.1194
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,4096,0.1566
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,8192,0.2118
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,16384,0.3170
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,32768,0.5304
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,65536,0.9370
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1,1,131072,1.7579
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,128,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,256,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,512,0.1179
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,2048,0.1613
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,1024,0.1357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,8192,0.3208
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,16384,0.5265
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,32768,0.9342
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,4096,0.2127
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,65536,1.7424
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1,1,131072,3.3782
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,128,0.1224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,256,0.1276
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,512,0.1403
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,2048,0.2182
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,1024,0.1679
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,4096,0.3234
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,16384,0.9572
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,8192,0.5319
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,32768,1.7666
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,65536,3.4152
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,128,0.1472
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,256,0.1520
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,512,0.1801
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,1,1,131072,6.7055
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,2048,0.3343
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,4096,0.5373
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,1024,0.2329
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,16384,1.8232
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,8192,0.9632
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,32768,3.4557
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,512,1,1,65536,6.7964
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,128,0.1859
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,256,0.1991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,2048,0.5376
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,512,0.2481
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,4096,0.9332
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,1024,0.3433
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,8192,1.7335
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,16384,3.3761
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,128,0.0582
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,256,0.0603
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,512,0.0706
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,1024,0.0684
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,2048,0.0623
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,4096,0.0644
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,8192,0.0645
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,32768,0.0805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,16384,0.0745
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1024,1,1,32768,6.4514
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,65536,0.1064
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1,1,131072,0.1175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,128,0.0644
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,256,0.0643
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,512,0.0724
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,1024,0.0685
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,2048,0.0724
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,4096,0.0743
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,8192,0.0729
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,16384,0.0789
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,32768,0.0993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,65536,0.1185
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1,1,131072,0.1481
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,128,0.0683
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,256,0.0724
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,512,0.0706
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,1024,0.0725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,2048,0.0703
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,4096,0.0807
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,16384,0.0989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,8192,0.0786
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,32768,0.1192
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,65536,0.1415
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1,1,131072,0.1924
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,128,0.0786
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,512,0.0805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,1024,0.0791
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,2048,0.0801
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,4096,0.0827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,8192,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,16384,0.1177
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,256,0.0726
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,32768,0.1485
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,65536,0.1956
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1,1,131072,0.2974
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,128,0.0829
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,256,0.0808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,512,0.0846
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,1024,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,2048,0.0949
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,4096,0.1054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,8192,0.1208
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,16384,0.1499
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,32768,0.2025
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,65536,0.3045
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1,1,131072,0.5016
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,128,0.0903
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,256,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,512,0.0893
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,1024,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,2048,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,4096,0.1260
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,8192,0.1549
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,16384,0.2066
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,32768,0.3098
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,65536,0.5129
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1,1,131072,0.9338
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,128,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,256,0.0928
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,2048,0.1325
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,1024,0.1134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,4096,0.1586
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,512,0.0974
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,8192,0.2060
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,16384,0.3155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,32768,0.5264
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,65536,0.9307
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1,1,131072,1.7577
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,128,0.0975
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,256,0.0994
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,512,0.1133
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,1024,0.1314
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,2048,0.1591
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,4096,0.2094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,8192,0.3156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,16384,0.5239
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,32768,0.9261
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,65536,1.7422
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1,1,131072,3.3673
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,128,0.1160
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,256,0.1222
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,512,0.1378
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,1024,0.1665
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,2048,0.2159
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,4096,0.3206
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,8192,0.5256
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,16384,0.9509
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,32768,1.7702
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,65536,3.4043
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,128,0.1404
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,256,0.1460
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,1,1,131072,6.7096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,512,0.1738
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,2048,0.3289
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,4096,0.5343
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,1024,0.2255
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,8192,0.9543
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,16384,1.8221
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,32768,3.4667
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,128,0.1791
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,256,0.1921
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,512,1,1,65536,6.8249
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,512,0.2416
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,2048,0.5294
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,4096,0.9230
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,1024,0.3339
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,8192,1.7117
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,16384,3.3670
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1024,1,1,32768,6.5139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,512,0.1349
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,1024,0.1379
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,128,0.1287
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,2048,0.1338
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,4096,0.1399
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,8192,0.1425
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,256,0.1302
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,65536,0.1570
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,16384,0.1462
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,32768,0.1519
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1,1,1,131072,0.1607
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,128,0.1365
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,256,0.1379
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,512,0.1423
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,1024,0.1392
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,2048,0.1429
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,32768,0.1557
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,8192,0.1515
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,65536,0.1607
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,16384,0.1506
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,4096,0.1446
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,2,1,1,131072,0.1766
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,128,0.1390
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,256,0.1422
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,512,0.1443
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,1024,0.1449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,4096,0.1525
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,8192,0.1516
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,2048,0.1462
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,16384,0.1542
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,32768,0.1628
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,65536,0.1732
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,4,1,1,131072,0.2007
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,128,0.1428
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,256,0.1439
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,1024,0.1533
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,2048,0.1547
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,512,0.1450
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,4096,0.1569
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,16384,0.1627
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,8192,0.1557
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,65536,0.2005
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,32768,0.1772
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,8,1,1,131072,0.2480
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,128,0.1537
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,256,0.1521
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,512,0.1544
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,1024,0.1627
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,16384,0.1845
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,2048,0.1641
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,32768,0.2105
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,8192,0.1718
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,4096,0.1653
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,65536,0.2632
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,16,1,1,131072,0.3702
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,128,0.1612
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,256,0.1641
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,512,0.1673
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,1024,0.1719
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,2048,0.1784
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,4096,0.1808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,8192,0.1998
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,16384,0.2207
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,32768,0.2749
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,65536,0.3793
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,32,1,1,131072,0.5900
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,256,0.1771
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,512,0.1781
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,128,0.1756
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,1024,0.1813
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,2048,0.1864
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,4096,0.2068
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,8192,0.2347
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,16384,0.2805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,32768,0.3855
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,65536,0.6015
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,64,1,1,131072,1.0172
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,128,0.1981
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,256,0.2013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,512,0.2040
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,1024,0.2080
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,2048,0.2334
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,4096,0.2504
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,8192,0.3042
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,16384,0.4100
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,32768,0.6262
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,65536,1.0593
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,128,1,1,131072,1.8938
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,256,0.2477
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,128,0.2612
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,512,0.2521
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,2048,0.3123
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,1024,0.2688
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,8192,0.4592
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,4096,0.3515
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,16384,0.6741
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,32768,1.1063
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,65536,1.9666
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,256,1,1,131072,3.6304
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,128,0.3914
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,256,0.3967
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,512,0.3821
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,1024,0.4080
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,2048,0.4587
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,4096,0.5598
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,8192,0.7501
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,16384,1.1323
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,32768,1.9236
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,512,1,1,65536,3.4419
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,128,0.6767
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,256,0.6267
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,512,0.6513
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,4096,1.0053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,2048,0.8075
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,16384,2.1406
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,1024,0.7628
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,8192,1.3854
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,128,1024,1,1,32768,3.7185
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,256,0.1062
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,512,0.1059
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,2048,0.1068
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,1024,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,8192,0.1133
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,128,0.1040
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,4096,0.1080
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,65536,0.1487
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,16384,0.1338
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,32768,0.1186
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1,1,1,131072,0.1342
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,512,0.1090
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,128,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,256,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,1024,0.1148
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,2048,0.1111
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,4096,0.1195
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,8192,0.1200
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,16384,0.1218
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,32768,0.1327
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,65536,0.1379
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,2,1,1,131072,0.1525
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,128,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,256,0.1103
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,512,0.1129
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,2048,0.1152
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,1024,0.1177
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,8192,0.1279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,16384,0.1333
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,4096,0.1224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,32768,0.1354
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,65536,0.1494
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,4,1,1,131072,0.1702
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,128,0.1137
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,256,0.1177
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,512,0.1152
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,1024,0.1224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,4096,0.1274
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,2048,0.1233
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,8192,0.1292
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,16384,0.1387
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,32768,0.1469
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,65536,0.1711
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,8,1,1,131072,0.2182
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,128,0.1216
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,256,0.1234
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,512,0.1244
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,1024,0.1316
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,2048,0.1320
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,4096,0.1386
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,8192,0.1437
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,16384,0.1556
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,32768,0.1783
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,65536,0.2204
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,16,1,1,131072,0.3195
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,128,0.1310
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,256,0.1339
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,512,0.1342
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,1024,0.1358
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,2048,0.1422
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,4096,0.1460
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,8192,0.1593
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,16384,0.1871
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,32768,0.2335
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,65536,0.3346
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,32,1,1,131072,0.5242
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,128,0.1346
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,256,0.1352
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,512,0.1482
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,1024,0.1480
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,2048,0.1550
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,4096,0.1668
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,8192,0.1904
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,16384,0.2406
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,32768,0.3403
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,65536,0.5384
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,64,1,1,131072,0.9230
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,128,0.1483
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,256,0.1522
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,512,0.1553
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,1024,0.1640
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,2048,0.1742
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,4096,0.2017
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,8192,0.2522
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,16384,0.3467
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,32768,0.5447
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,65536,0.9324
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,128,1,1,131072,1.6976
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,128,0.1773
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,256,0.1842
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,512,0.1918
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,1024,0.2100
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,2048,0.2246
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,4096,0.2752
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,8192,0.3751
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,16384,0.5797
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,32768,0.9751
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,65536,1.7543
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,256,1,1,131072,3.3057
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,128,0.2449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,256,0.2505
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,512,0.2769
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,2048,0.3851
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,1024,0.3051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,4096,0.4459
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,16384,1.0491
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,8192,0.6616
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,32768,1.8380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,512,1,1,65536,3.4055
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,128,0.3810
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,256,0.4213
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,512,0.4177
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,2048,0.5626
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,4096,0.7768
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,16384,1.8346
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,1024,0.4667
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,8192,1.1386
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,128,0.0976
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,256,0.0961
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,1024,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,2048,0.0982
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,512,0.1014
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,4096,0.1023
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,64,1024,1,1,32768,3.2887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,8192,0.1170
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,16384,0.1068
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,32768,0.1136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,65536,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1,1,1,131072,0.1236
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,128,0.1060
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,256,0.0988
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,512,0.1037
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,1024,0.1085
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,2048,0.1070
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,4096,0.1050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,8192,0.1145
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,32768,0.1168
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,16384,0.1141
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,131072,0.1420
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,2,1,1,65536,0.1294
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,128,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,256,0.1026
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,1024,0.1073
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,2048,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,4096,0.1077
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,8192,0.1133
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,16384,0.1218
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,32768,0.1317
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,512,0.1112
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,65536,0.1461
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,4,1,1,131072,0.1789
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,128,0.1110
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,256,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,512,0.1087
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,1024,0.1116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,2048,0.1157
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,4096,0.1198
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,8192,0.1279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,16384,0.1352
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,32768,0.1488
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,65536,0.1799
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,8,1,1,131072,0.2443
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,128,0.1169
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,256,0.1168
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,512,0.1195
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,1024,0.1262
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,2048,0.1232
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,4096,0.1315
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,8192,0.1377
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,16384,0.1550
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,65536,0.2638
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,32768,0.1941
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,16,1,1,131072,0.4021
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,128,0.1272
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,256,0.1300
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,512,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,1024,0.1322
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,4096,0.1459
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,2048,0.1358
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,8192,0.1621
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,16384,0.2030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,32768,0.2694
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,65536,0.4121
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,32,1,1,131072,0.6907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,128,0.1412
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,256,0.1371
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,512,0.1419
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,1024,0.1457
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,2048,0.1535
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,4096,0.1726
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,8192,0.2064
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,16384,0.2794
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,32768,0.4187
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,65536,0.6986
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,64,1,1,131072,1.2536
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,128,0.1625
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,256,0.1607
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,512,0.1647
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,1024,0.1765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,2048,0.1951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,4096,0.2325
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,8192,0.2993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,16384,0.4409
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,32768,0.7217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,65536,1.2935
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,128,1,1,131072,2.4608
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,128,0.2036
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,256,0.2035
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,512,0.2127
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,1024,0.2338
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,2048,0.2699
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,16384,0.7553
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,4096,0.3439
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,8192,0.4817
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,32768,1.3185
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,65536,2.4752
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,256,1,1,131072,4.8625
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,128,0.2920
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,256,0.2903
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,512,0.3065
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,2048,0.4073
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,1024,0.3426
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,4096,0.5340
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,16384,1.2712
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,8192,0.7785
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,32768,2.3079
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,512,1,1,65536,4.4890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,128,0.4902
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,256,0.4895
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,512,0.5214
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,2048,0.7136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,4096,0.9655
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,16384,2.4425
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,1024,0.5882
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,128,0.0831
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,8192,1.4544
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,256,0.0891
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,512,0.0886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,1024,0.0891
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,2048,0.0866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,4096,0.0926
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,8192,0.0886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,16384,0.1001
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,65536,0.1145
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,131072,0.1209
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,32,1024,1,1,32768,4.5241
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1,1,1,32768,0.0995
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,128,0.0832
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,256,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,512,0.0908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,1024,0.0934
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,2048,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,4096,0.0867
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,8192,0.0927
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,16384,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,32768,0.1194
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,65536,0.1279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,2,1,1,131072,0.1340
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,128,0.0887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,256,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,512,0.0897
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,1024,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,2048,0.0887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,4096,0.0979
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,8192,0.1068
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,32768,0.1194
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,65536,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,131072,0.1559
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,4,1,1,16384,0.1088
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,128,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,256,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,512,0.0936
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,1024,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,2048,0.1056
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,8192,0.1151
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,4096,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,16384,0.1209
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,32768,0.1360
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,65536,0.1580
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,8,1,1,131072,0.2031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,128,0.1054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,256,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,512,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,1024,0.1044
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,2048,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,4096,0.1181
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,8192,0.1264
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,16384,0.1388
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,32768,0.1636
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,65536,0.2010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,16,1,1,131072,0.2871
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,128,0.1072
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,256,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,512,0.1085
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,1024,0.1175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,2048,0.1199
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,4096,0.1314
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,8192,0.1392
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,16384,0.1624
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,32768,0.2105
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,65536,0.3024
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,32,1,1,131072,0.4780
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,128,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,256,0.1200
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,512,0.1240
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,1024,0.1301
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,4096,0.1508
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,2048,0.1370
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,8192,0.1718
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,16384,0.2165
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,32768,0.3099
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,131072,0.8477
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,64,1,1,65536,0.4862
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,128,0.1316
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,256,0.1362
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,512,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,2048,0.1562
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,4096,0.1806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,1024,0.1468
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,8192,0.2256
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,16384,0.3173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,32768,0.4934
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,65536,0.8525
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,128,1,1,131072,1.5622
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,128,0.1600
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,256,0.1582
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,512,0.1668
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,2048,0.2073
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,1024,0.1798
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,4096,0.2513
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,16384,0.5248
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,8192,0.3406
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,32768,0.8836
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,65536,1.5982
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,128,0.2077
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,256,1,1,131072,3.0382
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,256,0.2087
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,512,0.2224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,2048,0.3002
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,4096,0.3899
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,1024,0.2502
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,8192,0.5710
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,16384,0.9361
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,32768,1.6653
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,512,1,1,65536,3.1224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,128,0.3013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,256,0.3098
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,2048,0.4680
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,512,0.3358
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,4096,0.6406
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,1024,0.3795
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,8192,0.9823
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,128,0.0849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,256,0.0804
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,512,0.0785
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,16384,1.6616
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,1024,0.0825
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,2048,0.0786
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,4096,0.0872
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,8192,0.0864
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,16384,0.0866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,32768,0.0899
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,65536,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1,1,1,131072,0.1102
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,16,1024,1,1,32768,3.0276
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,128,0.0793
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,256,0.0872
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,512,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,1024,0.0828
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,4096,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,8192,0.0893
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,16384,0.0910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,32768,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,65536,0.1200
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,2048,0.0808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,2,1,1,131072,0.1238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,256,0.0827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,128,0.0848
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,512,0.0847
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,1024,0.0844
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,2048,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,4096,0.0915
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,8192,0.0977
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,16384,0.0993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,65536,0.1225
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,32768,0.1148
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,4,1,1,131072,0.1448
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,128,0.0887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,512,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,2048,0.0912
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,4096,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,8192,0.0996
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,256,0.0914
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,16384,0.1121
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,1024,0.0910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,32768,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,65536,0.1487
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,8,1,1,131072,0.1920
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,128,0.0962
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,256,0.0993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,512,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,1024,0.0989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,4096,0.1104
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,2048,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,8192,0.1237
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,16384,0.1340
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,32768,0.1523
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,65536,0.1971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,16,1,1,131072,0.2841
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,128,0.1034
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,256,0.1055
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,512,0.1050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,1024,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,2048,0.1116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,4096,0.1247
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,8192,0.1332
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,16384,0.1575
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,32768,0.2060
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,65536,0.2951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,32,1,1,131072,0.4720
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,128,0.1093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,256,0.1081
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,512,0.1132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,1024,0.1170
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,2048,0.1259
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,4096,0.1430
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,8192,0.1625
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,16384,0.2073
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,32768,0.2996
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,65536,0.4768
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,64,1,1,131072,0.8359
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,128,0.1173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,256,0.1152
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,512,0.1235
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,1024,0.1337
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,2048,0.1441
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,4096,0.1690
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,8192,0.2118
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,16384,0.3032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,32768,0.4813
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,65536,0.8358
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,128,1,1,131072,1.5496
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,128,0.1373
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,256,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,512,0.1474
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,1024,0.1578
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,2048,0.1825
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,4096,0.2312
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,8192,0.3188
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,16384,0.5003
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,32768,0.8580
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,65536,1.5736
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,256,1,1,131072,3.0067
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,128,0.1696
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,256,0.1712
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,512,0.1834
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,2048,0.2554
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,1024,0.2081
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,4096,0.3494
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,16384,0.8933
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,8192,0.5296
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,32768,1.6238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,512,1,1,65536,3.0787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,128,0.2264
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,256,0.2327
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,2048,0.3908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,512,0.2576
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,4096,0.5633
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,1024,0.3025
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,8192,0.9022
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,16384,1.5849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,128,0.0704
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,256,0.0725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,512,0.0765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,1024,0.0766
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,2048,0.0769
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,4096,0.0738
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,8192,0.0745
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,8,1024,1,1,32768,2.9527
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,16384,0.0805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,32768,0.0876
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,65536,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1,1,1,131072,0.1080
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,128,0.0773
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,256,0.0846
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,512,0.0806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,1024,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,4096,0.0827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,2048,0.0847
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,8192,0.0829
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,16384,0.0866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,32768,0.0925
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,65536,0.1173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,2,1,1,131072,0.1242
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,128,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,256,0.0859
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,512,0.0836
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,1024,0.0824
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,2048,0.0831
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,4096,0.0906
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,8192,0.0907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,16384,0.0996
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,32768,0.1132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,65536,0.1228
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,4,1,1,131072,0.1441
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,128,0.0850
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,256,0.0907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,512,0.0866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,1024,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,4096,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,8192,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,16384,0.1150
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,32768,0.1245
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,65536,0.1496
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,131072,0.1894
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,8,1,1,2048,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,128,0.0976
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,256,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,512,0.0952
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,1024,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,2048,0.0993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,4096,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,8192,0.1180
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,16384,0.1286
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,32768,0.1523
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,65536,0.1985
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,16,1,1,131072,0.2807
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,128,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,256,0.0978
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,512,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,1024,0.1029
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,4096,0.1212
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,2048,0.1051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,8192,0.1338
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,16384,0.1561
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,32768,0.1997
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,65536,0.2915
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,32,1,1,131072,0.4677
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,128,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,256,0.1080
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,512,0.1092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,1024,0.1104
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,2048,0.1256
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,4096,0.1358
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,8192,0.1574
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,32768,0.2956
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,16384,0.2068
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,65536,0.4759
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,64,1,1,131072,0.8311
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,128,0.1096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,256,0.1139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,512,0.1132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,2048,0.1383
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,1024,0.1280
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,4096,0.1607
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,8192,0.2060
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,16384,0.2978
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,32768,0.4750
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,65536,0.8332
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,128,1,1,131072,1.5410
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,256,0.1278
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,128,0.1276
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,512,0.1353
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,2048,0.1715
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,1024,0.1484
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,4096,0.2182
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,8192,0.3099
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,16384,0.4872
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,65536,1.5652
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,32768,0.8489
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,256,1,1,131072,2.9979
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,128,0.1504
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,256,0.1514
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,512,0.1644
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,2048,0.2345
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,4096,0.3283
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,1024,0.1879
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,16384,0.8740
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,8192,0.5092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,32768,1.6023
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,512,1,1,65536,3.0546
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,128,0.1910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,256,0.1975
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,2048,0.3529
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,512,0.2218
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,16384,1.5449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,4096,0.5240
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,1024,0.2658
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,8192,0.8625
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,128,0.0768
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,512,0.0710
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,256,0.0726
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,1024,0.0725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,2048,0.0746
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,4096,0.0724
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,8192,0.0836
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,16384,0.0787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,32768,0.0806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,65536,0.0914
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1,1,1,131072,0.1119
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,4,1024,1,1,32768,2.9105
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,128,0.0728
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,256,0.0806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,512,0.0744
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,1024,0.0787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,2048,0.0808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,4096,0.0767
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,8192,0.0849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,16384,0.0807
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,32768,0.0953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,65536,0.1097
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,2,1,1,131072,0.1187
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,128,0.0806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,256,0.0824
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,512,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,1024,0.0808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,2048,0.0848
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,4096,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,8192,0.0846
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,16384,0.0900
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,32768,0.1070
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,65536,0.1236
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,4,1,1,131072,0.1405
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,128,0.0882
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,512,0.0865
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,256,0.0911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,1024,0.0927
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,2048,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,4096,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,8192,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,16384,0.1111
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,32768,0.1237
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,65536,0.1444
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,8,1,1,131072,0.1876
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,128,0.0912
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,256,0.0949
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,512,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,1024,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,2048,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,4096,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,8192,0.1150
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,16384,0.1259
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,32768,0.1465
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,65536,0.1944
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,16,1,1,131072,0.2781
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,128,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,512,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,256,0.0996
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,1024,0.0994
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,2048,0.0996
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,4096,0.1123
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,8192,0.1298
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,16384,0.1517
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,32768,0.2005
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,65536,0.2862
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,32,1,1,131072,0.4656
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,128,0.1051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,256,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,512,0.1050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,1024,0.1093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,2048,0.1201
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,4096,0.1315
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,8192,0.1564
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,16384,0.2049
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,32768,0.2938
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,65536,0.4704
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,64,1,1,131072,0.8280
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,128,0.1083
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,256,0.1090
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,512,0.1100
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,1024,0.1244
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,2048,0.1381
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,4096,0.1585
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,8192,0.2051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,16384,0.2935
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,32768,0.4725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,65536,0.8301
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,128,1,1,131072,1.5444
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,128,0.1215
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,256,0.1203
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,512,0.1297
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,2048,0.1689
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,1024,0.1407
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,4096,0.2110
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,8192,0.3052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,32768,0.8433
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,16384,0.4832
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,65536,1.5558
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,256,1,1,131072,2.9911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,128,0.1400
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,256,0.1433
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,512,0.1547
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,2048,0.2261
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,1024,0.1790
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,4096,0.3184
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,8192,0.4977
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,32768,1.5892
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,16384,0.8641
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,512,1,1,65536,3.0437
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,128,0.1760
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,256,0.1790
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,512,0.2024
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,2048,0.3316
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,4096,0.5038
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,1024,0.2449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,8192,0.8450
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,16384,1.5262
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,128,0.0644
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,256,0.0685
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,512,0.0704
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,1024,0.0788
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,2048,0.0705
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,4096,0.0686
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,16384,0.0747
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,8192,0.0742
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,32768,0.0785
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,2,1024,1,1,32768,2.8904
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,131072,0.1099
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1,1,1,65536,0.0896
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,128,0.0748
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,256,0.0768
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,512,0.0723
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,1024,0.0725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,2048,0.0747
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,4096,0.0787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,8192,0.0849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,16384,0.0827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,32768,0.0856
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,65536,0.1044
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,2,1,1,131072,0.1218
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,128,0.0746
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,256,0.0805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,512,0.0768
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,1024,0.0809
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,2048,0.0831
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,4096,0.0787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,8192,0.0808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,16384,0.0848
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,32768,0.1068
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,65536,0.1157
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,4,1,1,131072,0.1411
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,128,0.0866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,256,0.0805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,512,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,2048,0.0846
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,1024,0.0892
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,4096,0.0847
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,16384,0.1092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,65536,0.1394
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,32768,0.1222
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,8192,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,8,1,1,131072,0.1827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,128,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,256,0.0886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,512,0.0932
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,1024,0.0936
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,2048,0.0993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,4096,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,16384,0.1220
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,8192,0.1093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,32768,0.1449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,65536,0.1883
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,16,1,1,131072,0.2740
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,128,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,256,0.0978
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,512,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,1024,0.0981
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,2048,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,4096,0.1117
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,8192,0.1256
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,16384,0.1472
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,32768,0.1965
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,65536,0.2874
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,32,1,1,131072,0.4624
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,128,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,256,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,512,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,2048,0.1153
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,1024,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,4096,0.1309
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,8192,0.1515
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,16384,0.1971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,65536,0.4652
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,32768,0.2913
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,64,1,1,131072,0.8257
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,128,0.1051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,256,0.1049
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,512,0.1078
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,1024,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,2048,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,4096,0.1561
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,8192,0.2002
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,16384,0.2924
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,32768,0.4683
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,65536,0.8269
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,128,1,1,131072,1.5392
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,128,0.1153
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,256,0.1160
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,512,0.1285
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,2048,0.1670
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,1024,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,4096,0.2084
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,8192,0.2981
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,32768,0.8370
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,16384,0.4801
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,65536,1.5540
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,256,1,1,131072,2.9942
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,128,0.1339
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,256,0.1379
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,512,0.1490
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,1024,0.1732
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,2048,0.2185
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,4096,0.3086
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,16384,0.8548
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,8192,0.4897
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,32768,1.5852
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,512,1,1,65536,3.0421
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,128,0.1648
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,256,0.1689
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,512,0.1913
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,2048,0.3226
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,4096,0.4932
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,1024,0.2355
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,8192,0.8331
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,16384,1.5133
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,float16,1,1024,1,1,32768,2.8798
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,512,0.1365
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,1024,0.1386
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,2048,0.1360
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,4096,0.1505
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,128,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,8192,0.1495
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,16384,0.1599
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,256,0.1507
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,32768,0.1525
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,65536,0.1647
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1,1,131072,0.1703
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,128,0.1421
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,256,0.1383
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,512,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,1024,0.1423
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,2048,0.1421
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,4096,0.1483
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,8192,0.1572
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,16384,0.1577
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,32768,0.1641
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,131072,0.1936
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1,1,65536,0.1742
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,128,0.1447
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,256,0.1383
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,512,0.1412
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,1024,0.1476
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,2048,0.1606
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,4096,0.1646
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,8192,0.1502
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,16384,0.1597
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,32768,0.1708
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,65536,0.1959
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1,1,131072,0.2497
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,128,0.1426
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,256,0.1458
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,512,0.1509
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,1024,0.1559
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,2048,0.1647
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,4096,0.1537
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,8192,0.1634
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,32768,0.1955
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,65536,0.2502
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,131072,0.3450
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1,1,16384,0.1773
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,128,0.1563
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,256,0.1587
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,512,0.1599
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,1024,0.1690
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,2048,0.1667
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,4096,0.1715
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,8192,0.1831
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,16384,0.2063
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,32768,0.2610
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,65536,0.3690
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1,1,131072,0.5803
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,128,0.1621
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,256,0.1652
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,512,0.1686
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,1024,0.1746
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,2048,0.1809
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,4096,0.1964
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,8192,0.2227
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,16384,0.2728
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,32768,0.3841
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,131072,1.0114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1,1,65536,0.5878
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,128,0.1833
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,256,0.1841
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,512,0.1892
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,1024,0.1927
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,2048,0.2096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,4096,0.2382
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,8192,0.2872
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,16384,0.3918
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,65536,1.0244
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,32768,0.6049
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1,1,131072,1.8594
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,128,0.2320
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,512,0.2464
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,256,0.2357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,1024,0.2556
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,2048,0.2867
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,4096,0.3384
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,8192,0.4435
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,16384,0.6553
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,32768,1.0751
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,65536,1.9179
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1,1,131072,3.5954
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,128,0.3281
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,256,0.3343
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,512,0.3492
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,2048,0.4393
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,4096,0.5429
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,1024,0.3823
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,8192,0.7519
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,32768,2.0225
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,16384,1.1767
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,65536,3.7224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,1,1,131072,7.0996
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,128,0.5147
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,256,0.5279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,512,0.5555
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,1024,0.6055
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,2048,0.7062
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,4096,0.9000
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,32768,3.6078
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,8192,1.2866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,16384,2.0586
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,512,1,1,65536,6.8112
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,128,0.9080
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,256,0.9324
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,512,0.9847
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,2048,1.2826
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,1024,1.0865
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,4096,1.6717
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,16384,3.9861
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,8192,2.4427
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,128,0.1155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,256,0.1214
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,512,0.1140
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,1024,0.1172
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,2048,0.1238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,4096,0.1301
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,16384,0.1303
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,32768,0.1441
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,65536,0.1414
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,8192,0.1291
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1024,1,1,32768,7.1051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1,1,131072,0.1545
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,128,0.1141
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,256,0.1225
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,512,0.1216
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,1024,0.1213
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,2048,0.1353
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,4096,0.1337
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,8192,0.1314
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,16384,0.1401
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,32768,0.1533
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,65536,0.1600
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1,1,131072,0.1896
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,128,0.1204
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,256,0.1179
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,512,0.1210
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,1024,0.1302
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,2048,0.1318
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,4096,0.1357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,8192,0.1482
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,16384,0.1443
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,32768,0.1581
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,65536,0.1880
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1,1,131072,0.2388
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,128,0.1294
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,256,0.1278
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,512,0.1258
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,2048,0.1372
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,1024,0.1301
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,4096,0.1482
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,16384,0.1635
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,8192,0.1497
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,32768,0.1886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,65536,0.2404
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1,1,131072,0.3531
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,128,0.1316
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,256,0.1317
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,512,0.1370
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,1024,0.1495
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,2048,0.1548
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,4096,0.1523
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,8192,0.1650
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,16384,0.1913
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,32768,0.2484
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,65536,0.3561
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1,1,131072,0.5623
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,128,0.1434
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,256,0.1439
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,512,0.1461
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,2048,0.1543
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,1024,0.1572
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,4096,0.1706
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,8192,0.2010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,16384,0.2539
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,32768,0.3621
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,65536,0.5785
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1,1,131072,0.9887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,128,0.1484
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,256,0.1476
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,512,0.1608
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,1024,0.1702
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,2048,0.1824
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,4096,0.2098
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,8192,0.2620
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,16384,0.3706
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,32768,0.5967
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,65536,1.0036
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1,1,131072,1.8341
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,128,0.1729
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,256,0.1770
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,512,0.1838
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,2048,0.2265
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,4096,0.2811
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,8192,0.3851
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,16384,0.6077
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,1024,0.2008
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,32768,1.0245
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,65536,1.8505
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1,1,131072,3.5091
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,128,0.2278
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,256,0.2342
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,512,0.2528
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,1024,0.2808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,2048,0.3344
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,4096,0.4405
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,8192,0.6587
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,16384,1.0981
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,32768,1.9296
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,65536,3.6365
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,128,0.3269
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,256,0.3454
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,1,1,131072,6.9788
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,512,0.3725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,2048,0.5373
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,1024,0.4288
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,4096,0.7543
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,8192,1.1941
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,16384,2.0844
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,32768,3.7956
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,512,1,1,65536,7.3067
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,128,0.5472
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,256,0.5720
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,512,0.6231
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,2048,0.9267
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,1024,0.7257
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,4096,1.3371
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,16384,3.8526
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,128,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,256,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,8192,2.1651
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,512,0.1014
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1024,1,1,32768,7.1425
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,1024,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,2048,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,4096,0.1092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,8192,0.1090
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,16384,0.1157
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,32768,0.1274
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,65536,0.1361
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1,1,131072,0.1697
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,128,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,256,0.1066
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,512,0.1054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,1024,0.1066
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,2048,0.1113
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,4096,0.1113
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,8192,0.1154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,16384,0.1256
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,32768,0.1425
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,65536,0.1573
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1,1,131072,0.1918
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,128,0.1076
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,256,0.1066
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,512,0.1087
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,1024,0.1173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,2048,0.1109
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,4096,0.1180
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,8192,0.1268
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,16384,0.1404
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,32768,0.1607
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,65536,0.1913
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1,1,131072,0.2668
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,128,0.1067
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,256,0.1116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,512,0.1152
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,1024,0.1159
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,2048,0.1214
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,4096,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,8192,0.1443
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,16384,0.1643
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,32768,0.1975
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,65536,0.2664
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1,1,131072,0.4088
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,128,0.1149
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,256,0.1153
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,512,0.1194
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,1024,0.1278
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,2048,0.1382
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,4096,0.1481
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,8192,0.1645
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,16384,0.2035
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,32768,0.2800
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,65536,0.4371
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1,1,131072,0.7481
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,128,0.1271
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,256,0.1256
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,512,0.1307
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,1024,0.1399
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,2048,0.1496
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,4096,0.1691
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,8192,0.2063
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,16384,0.2872
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,32768,0.4413
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,65536,0.7508
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1,1,131072,1.3679
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,128,0.1338
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,256,0.1362
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,512,0.1423
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,2048,0.1706
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,1024,0.1536
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,4096,0.2116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,16384,0.4386
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,32768,0.7488
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,8192,0.2869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,65536,1.3550
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1,1,131072,2.5740
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,128,0.1531
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,256,0.1543
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,512,0.1685
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,1024,0.1866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,2048,0.2259
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,4096,0.3025
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,8192,0.4576
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,16384,0.7690
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,32768,1.4043
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,65536,2.6894
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1,1,131072,5.2019
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,128,0.1913
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,256,0.1912
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,512,0.2139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,2048,0.3303
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,1024,0.2540
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,4096,0.4810
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,8192,0.7827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,16384,1.4057
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,32768,2.7227
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,65536,5.4230
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,128,0.2576
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,256,0.2632
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,512,0.2998
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,1024,0.3719
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,4096,0.7840
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,1,1,131072,10.9243
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,2048,0.5131
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,16384,2.5240
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,8192,1.3293
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,32768,5.1326
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,512,1,1,65536,10.0805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,128,0.3992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,256,0.4129
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,512,0.4831
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,2048,0.9034
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,4096,1.4440
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,1024,0.6253
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,8192,2.5359
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,16384,4.9384
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,128,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,256,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,512,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,1024,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1024,1,1,32768,10.0846
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,2048,0.1016
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,4096,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,8192,0.0968
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,16384,0.1022
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,32768,0.1193
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,65536,0.1264
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1,1,131072,0.1483
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,128,0.0913
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,256,0.0995
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,512,0.0976
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,1024,0.0942
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,2048,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,4096,0.1009
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,8192,0.1043
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,16384,0.1161
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,32768,0.1346
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,65536,0.1436
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1,1,131072,0.1688
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,128,0.0927
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,256,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,512,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,1024,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,2048,0.0985
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,4096,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,8192,0.1135
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,16384,0.1278
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,32768,0.1371
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,65536,0.1637
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1,1,131072,0.2154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,128,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,256,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,512,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,1024,0.1097
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,2048,0.1124
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,4096,0.1131
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,8192,0.1283
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,16384,0.1439
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,32768,0.1726
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,65536,0.2302
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1,1,131072,0.3315
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,128,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,256,0.1092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,512,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,1024,0.1136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,2048,0.1239
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,4096,0.1350
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,8192,0.1466
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,16384,0.1766
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,32768,0.2285
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,65536,0.3330
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1,1,131072,0.5290
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,128,0.1155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,256,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,512,0.1146
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,1024,0.1260
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,2048,0.1336
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,4096,0.1506
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,8192,0.1759
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,16384,0.2329
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,32768,0.3389
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,65536,0.5451
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1,1,131072,0.9548
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,128,0.1155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,256,0.1216
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,512,0.1246
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,1024,0.1422
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,2048,0.1588
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,4096,0.1853
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,8192,0.2337
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,16384,0.3399
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,65536,0.9609
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,32768,0.5559
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1,1,131072,1.7795
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,128,0.1298
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,256,0.1309
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,512,0.1439
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,1024,0.1583
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,2048,0.1852
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,4096,0.2427
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,16384,0.5555
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,8192,0.3449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,32768,0.9566
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,65536,1.7787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1,1,131072,3.4149
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,128,0.1550
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,256,0.1615
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,512,0.1750
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,1024,0.2030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,2048,0.2563
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,4096,0.3611
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,16384,0.9816
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,32768,1.8175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,8192,0.5661
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,65536,3.4605
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,1,1,131072,6.7485
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,128,0.1970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,256,0.2040
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,512,0.2347
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,2048,0.3881
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,1024,0.2890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,4096,0.5968
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,8192,1.0194
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,16384,1.8708
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,32768,3.5239
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,512,1,1,65536,6.8643
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,128,0.2816
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,256,0.2959
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,512,0.3496
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,2048,0.6411
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,4096,1.0346
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,16384,3.4765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,1024,0.4457
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,8192,1.8443
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,128,0.0918
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1024,1,1,32768,6.6006
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,256,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,1024,0.0887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,512,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,2048,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,4096,0.0937
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,8192,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,16384,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,32768,0.1145
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,65536,0.1305
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1,1,131072,0.1362
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,128,0.0926
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,256,0.0977
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,512,0.0970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,1024,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,2048,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,4096,0.0953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,8192,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,16384,0.1107
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,32768,0.1288
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,65536,0.1463
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1,1,131072,0.1698
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,128,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,256,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,512,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,1024,0.0982
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,2048,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,4096,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,8192,0.1128
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,16384,0.1237
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,32768,0.1362
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,65536,0.1585
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1,1,131072,0.2194
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,128,0.1035
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,512,0.1034
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,1024,0.0976
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,2048,0.1016
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,4096,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,8192,0.1242
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,256,0.0970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,16384,0.1437
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,32768,0.1712
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,65536,0.2255
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1,1,131072,0.3170
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,128,0.1091
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,256,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,512,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,1024,0.1111
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,2048,0.1198
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,4096,0.1257
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,8192,0.1445
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,16384,0.1707
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,32768,0.2284
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,65536,0.3279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1,1,131072,0.5215
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,128,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,256,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,512,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,1024,0.1195
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,2048,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,4096,0.1498
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,8192,0.1712
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,16384,0.2231
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,32768,0.3305
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,65536,0.5414
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1,1,131072,0.9516
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,128,0.1116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,256,0.1147
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,512,0.1218
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,2048,0.1485
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,1024,0.1340
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,4096,0.1752
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,8192,0.2284
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,16384,0.3344
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,32768,0.5457
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,65536,0.9524
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1,1,131072,1.7683
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,128,0.1183
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,256,0.1273
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,512,0.1357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,1024,0.1517
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,2048,0.1794
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,4096,0.2317
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,8192,0.3384
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,16384,0.5434
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,32768,0.9477
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,65536,1.7581
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1,1,131072,3.3979
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,128,0.1405
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,256,0.1491
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,2048,0.2410
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,512,0.1630
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,1024,0.1919
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,4096,0.3447
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,8192,0.5565
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,16384,0.9689
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,32768,1.7924
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,65536,3.4423
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,128,0.1773
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,256,0.1825
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,1,1,131072,6.7456
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,512,0.2104
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,1024,0.2607
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,2048,0.3661
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,4096,0.5694
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,8192,0.9969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,16384,1.8557
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,32768,3.4832
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,512,1,1,65536,6.8560
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,128,0.2330
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,256,0.2489
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,512,0.2996
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,2048,0.5924
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,1024,0.3953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,4096,0.9906
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,16384,3.4154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,8192,1.7960
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,128,0.0887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,256,0.0852
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,512,0.0904
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,1024,0.0910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,2048,0.0912
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,8192,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,4096,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,16384,0.0908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,32768,0.1083
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,65536,0.1255
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1,1,131072,0.1394
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1024,1,1,32768,6.5794
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,128,0.0949
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,256,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,512,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,1024,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,2048,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,4096,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,8192,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,16384,0.1092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,32768,0.1222
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,65536,0.1414
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1,1,131072,0.1610
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,128,0.0880
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,256,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,512,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,1024,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,2048,0.0928
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,8192,0.0993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,16384,0.1227
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,32768,0.1422
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,65536,0.1685
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,131072,0.2131
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1,1,4096,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,128,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,256,0.0947
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,512,0.0958
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,2048,0.1014
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,4096,0.1113
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,8192,0.1181
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,16384,0.1436
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,32768,0.1635
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,65536,0.2251
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,1024,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1,1,131072,0.3146
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,128,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,256,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,512,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,1024,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,2048,0.1084
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,4096,0.1261
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,8192,0.1425
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,16384,0.1679
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,32768,0.2221
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,65536,0.3272
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1,1,131072,0.5228
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,128,0.1050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,256,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,512,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,2048,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,1024,0.1140
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,4096,0.1435
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,8192,0.1720
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,16384,0.2231
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,32768,0.3256
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,65536,0.5335
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1,1,131072,0.9464
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,128,0.1132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,256,0.1096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,512,0.1154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,1024,0.1329
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,2048,0.1461
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,4096,0.1749
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,8192,0.2258
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,16384,0.3299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,32768,0.5426
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,65536,0.9537
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1,1,131072,1.7728
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,128,0.1139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,256,0.1197
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,512,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,1024,0.1474
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,2048,0.1775
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,4096,0.2271
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,8192,0.3285
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,16384,0.5421
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,32768,0.9471
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,65536,1.7604
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1,1,131072,3.3826
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,128,0.1321
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,256,0.1419
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,512,0.1580
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,2048,0.2343
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,1024,0.1827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,4096,0.3412
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,16384,0.9758
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,8192,0.5472
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,32768,1.7809
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,65536,3.4192
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,1,1,131072,6.7174
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,128,0.1649
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,256,0.1707
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,512,0.2007
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,1024,0.2532
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,2048,0.3513
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,4096,0.5605
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,8192,0.9765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,32768,3.4852
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,16384,1.8322
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,512,1,1,65536,6.8047
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,128,0.2138
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,256,0.2283
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,512,0.2798
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,2048,0.5683
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,1024,0.3707
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,4096,0.9608
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,16384,3.3899
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,8192,1.7584
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,128,0.0809
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,256,0.0806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,512,0.0870
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,1024,0.0910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,2048,0.0849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,4096,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,8192,0.0932
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,32768,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,65536,0.1203
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,131072,0.1391
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1024,1,1,32768,6.5609
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1,1,16384,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,128,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,256,0.0916
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,512,0.0908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,2048,0.0887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,4096,0.0953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,8192,0.0895
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,1024,0.0886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,16384,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,32768,0.1121
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,65536,0.1429
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1,1,131072,0.1624
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,128,0.0893
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,256,0.0913
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,512,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,1024,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,2048,0.0973
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,4096,0.0928
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,8192,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,16384,0.1175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,32768,0.1381
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,65536,0.1643
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1,1,131072,0.2111
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,128,0.0995
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,256,0.0996
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,512,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,1024,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,2048,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,4096,0.1055
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,8192,0.1207
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,16384,0.1399
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,32768,0.1603
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,65536,0.2139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1,1,131072,0.3140
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,128,0.0979
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,256,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,512,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,1024,0.1096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,2048,0.1135
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,4096,0.1211
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,8192,0.1436
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,16384,0.1675
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,32768,0.2172
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,65536,0.3201
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1,1,131072,0.5291
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,128,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,256,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,512,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,1024,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,2048,0.1238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,4096,0.1421
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,8192,0.1724
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,16384,0.2187
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,32768,0.3280
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,65536,0.5375
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1,1,131072,0.9427
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,128,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,256,0.1060
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,512,0.1128
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,1024,0.1295
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,2048,0.1439
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,4096,0.1687
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,8192,0.2273
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,16384,0.3274
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,32768,0.5423
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,65536,0.9449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1,1,131072,1.7763
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,128,0.1154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,256,0.1153
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,512,0.1257
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,1024,0.1478
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,2048,0.1724
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,4096,0.2261
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,8192,0.3291
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,16384,0.5427
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,32768,0.9379
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,65536,1.7608
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1,1,131072,3.3743
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,128,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,256,0.1382
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,2048,0.2294
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,512,0.1545
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,4096,0.3357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,1024,0.1813
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,16384,0.9735
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,8192,0.5506
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,32768,1.7800
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,65536,3.4294
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,128,0.1583
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,1,1,131072,6.7177
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,256,0.1666
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,512,0.1963
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,4096,0.5533
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,1024,0.2454
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,8192,0.9747
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,2048,0.3476
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,16384,1.8311
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,32768,3.4830
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,512,1,1,65536,6.8100
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,128,0.2056
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,256,0.2197
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,512,0.2686
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,2048,0.5555
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,16384,3.3830
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,1024,0.3621
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,4096,0.9487
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,8192,1.7548
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1024,1,1,32768,6.5087
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,256,0.0865
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,1024,0.0845
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,2048,0.0897
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,4096,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,128,0.0786
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,8192,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,16384,0.0955
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,32768,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,65536,0.1182
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,131072,0.1316
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,128,0.0874
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,256,0.0866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,512,0.0849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,1024,0.0849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1,1,512,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,2048,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,4096,0.0879
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,8192,0.0939
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,16384,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,65536,0.1373
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,32768,0.1164
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1,1,131072,0.1632
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,128,0.0846
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,256,0.0831
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,512,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,1024,0.0928
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,2048,0.0938
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,4096,0.0989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,8192,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,16384,0.1059
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,32768,0.1354
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,65536,0.1545
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1,1,131072,0.2067
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,128,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,256,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,512,0.0934
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,1024,0.0937
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,2048,0.0967
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,4096,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,16384,0.1361
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,8192,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,32768,0.1618
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,65536,0.2144
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1,1,131072,0.3106
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,128,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,256,0.1015
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,512,0.0970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,1024,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,2048,0.1097
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,4096,0.1136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,8192,0.1385
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,16384,0.1627
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,32768,0.2207
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,131072,0.5160
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1,1,65536,0.3193
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,128,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,256,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,512,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,2048,0.1139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,4096,0.1387
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,1024,0.1058
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,8192,0.1643
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,16384,0.2209
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,32768,0.3220
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,65536,0.5353
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1,1,131072,0.9461
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,128,0.1054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,256,0.1035
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,512,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,1024,0.1259
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,2048,0.1414
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,4096,0.1638
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,8192,0.2181
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,16384,0.3233
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,32768,0.5350
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,65536,0.9467
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1,1,131072,1.7593
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,128,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,256,0.1093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,512,0.1254
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,1024,0.1422
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,2048,0.1664
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,4096,0.2252
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,8192,0.3262
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,16384,0.5368
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,32768,0.9421
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,65536,1.7524
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1,1,131072,3.3748
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,128,0.1226
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,256,0.1311
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,512,0.1515
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,1024,0.1743
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,2048,0.2258
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,4096,0.3302
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,16384,0.9732
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,8192,0.5463
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,32768,1.7771
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,65536,3.4030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,128,0.1543
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,256,0.1619
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,512,0.1891
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,1,1,131072,6.7008
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,2048,0.3419
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,1024,0.2395
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,4096,0.5500
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,8192,0.9688
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,16384,1.8320
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,32768,3.4729
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,128,0.1971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,512,1,1,65536,6.8323
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,256,0.2136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,512,0.2596
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,2048,0.5482
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,4096,0.9406
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,1024,0.3552
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,8192,1.7447
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,128,0.1273
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,256,0.1370
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,16384,3.3867
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,512,0.1359
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1024,1,1,32768,6.4787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,1024,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,2048,0.1317
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,4096,0.1357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,8192,0.1376
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,16384,0.1537
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,32768,0.1559
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,65536,0.1511
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1,1,1,131072,0.1646
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,128,0.1336
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,256,0.1317
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,512,0.1322
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,1024,0.1345
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,2048,0.1361
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,4096,0.1446
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,8192,0.1487
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,16384,0.1540
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,32768,0.1532
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,131072,0.1688
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,2,1,1,65536,0.1544
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,128,0.1373
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,256,0.1410
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,512,0.1331
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,1024,0.1384
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,2048,0.1452
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,4096,0.1497
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,16384,0.1485
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,32768,0.1592
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,8192,0.1514
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,65536,0.1738
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,4,1,1,131072,0.1916
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,128,0.1360
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,256,0.1390
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,512,0.1351
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,1024,0.1446
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,4096,0.1445
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,2048,0.1463
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,8192,0.1547
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,16384,0.1580
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,32768,0.1708
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,65536,0.1914
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,8,1,1,131072,0.2452
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,128,0.1442
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,256,0.1502
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,512,0.1439
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,1024,0.1533
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,2048,0.1555
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,4096,0.1543
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,8192,0.1616
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,16384,0.1790
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,32768,0.2026
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,65536,0.2600
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,16,1,1,131072,0.3591
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,128,0.1514
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,512,0.1588
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,256,0.1500
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,1024,0.1572
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,2048,0.1611
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,4096,0.1728
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,8192,0.1884
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,16384,0.2104
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,32768,0.2629
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,65536,0.3650
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,32,1,1,131072,0.5817
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,128,0.1599
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,256,0.1665
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,512,0.1699
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,2048,0.1740
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,1024,0.1667
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,4096,0.1892
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,8192,0.2139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,16384,0.2675
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,32768,0.3763
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,65536,0.5916
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,64,1,1,131072,1.0041
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,128,0.1972
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,256,0.2022
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,512,0.1974
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,2048,0.2158
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,1024,0.2017
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,4096,0.2431
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,8192,0.2981
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,16384,0.4055
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,65536,1.0446
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,32768,0.6285
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,128,1,1,131072,1.8874
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,128,0.2506
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,256,0.2535
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,2048,0.3031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,512,0.2606
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,1024,0.2751
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,4096,0.3728
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,8192,0.4656
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,16384,0.6970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,32768,1.1129
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,65536,1.9552
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,256,1,1,131072,3.6388
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,128,0.3455
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,256,0.3788
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,512,0.3648
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,2048,0.4440
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,1024,0.3914
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,4096,0.5432
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,16384,1.1168
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,8192,0.7642
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,32768,1.8757
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,512,1,1,65536,3.4248
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,128,0.5812
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,256,0.5895
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,512,0.6152
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2048,0.7723
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4096,0.9687
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,1024,0.6675
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8192,1.4104
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32768,3.6459
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16384,2.1679
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,128,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,256,0.1083
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,512,0.1139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,2048,0.1135
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,1024,0.1271
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,4096,0.1141
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,8192,0.1318
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,32768,0.1271
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,16384,0.1257
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,65536,0.1314
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1,1,1,131072,0.1410
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,128,0.1131
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,256,0.1185
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,512,0.1139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,1024,0.1135
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,4096,0.1255
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,8192,0.1272
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,16384,0.1268
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,32768,0.1374
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,65536,0.1421
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,2048,0.1189
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,2,1,1,131072,0.1582
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,128,0.1203
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,256,0.1183
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,512,0.1156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,1024,0.1182
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,2048,0.1188
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,4096,0.1278
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,8192,0.1277
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,16384,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,32768,0.1441
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,65536,0.1544
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,4,1,1,131072,0.1731
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,128,0.1198
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,256,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,512,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,1024,0.1221
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,2048,0.1295
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,8192,0.1326
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,4096,0.1339
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,16384,0.1438
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,32768,0.1520
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,65536,0.1744
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,8,1,1,131072,0.2196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,128,0.1259
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,256,0.1257
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,512,0.1313
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,2048,0.1340
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,4096,0.1371
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,8192,0.1458
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,1024,0.1318
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,16384,0.1560
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,32768,0.1811
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,65536,0.2251
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,16,1,1,131072,0.3193
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,128,0.1313
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,256,0.1297
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,512,0.1320
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,1024,0.1369
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,2048,0.1432
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,4096,0.1503
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,8192,0.1588
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,16384,0.1855
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,32768,0.2343
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,65536,0.3306
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,32,1,1,131072,0.5256
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,128,0.1330
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,512,0.1461
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,256,0.1383
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,2048,0.1534
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,1024,0.1463
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,4096,0.1671
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,8192,0.1902
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,16384,0.2405
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,32768,0.3412
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,65536,0.5355
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,64,1,1,131072,0.9195
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,128,0.1502
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,256,0.1513
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,512,0.1566
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,1024,0.1633
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,2048,0.1769
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,4096,0.2050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,8192,0.2511
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,16384,0.3455
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,32768,0.5458
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,65536,0.9335
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,128,1,1,131072,1.7055
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,128,0.1830
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,256,0.1859
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,512,0.1926
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,2048,0.2353
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,1024,0.2134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,4096,0.2899
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,8192,0.3837
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,16384,0.5938
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,32768,0.9784
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,65536,1.7558
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,256,1,1,131072,3.2980
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,128,0.2570
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,256,0.2639
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,512,0.2616
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,2048,0.3433
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,4096,0.4442
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,1024,0.3036
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,16384,1.0468
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,8192,0.6452
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,32768,1.8472
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,512,1,1,65536,3.4161
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,128,0.3646
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,256,0.3799
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16384,1.8208
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,512,0.4313
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2048,0.5765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4096,0.7329
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,1024,0.4535
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8192,1.1230
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32768,3.2710
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,128,0.1112
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,256,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,512,0.1029
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,1024,0.1136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,2048,0.1075
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,4096,0.1078
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,8192,0.1122
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,16384,0.1222
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,32768,0.1291
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,65536,0.1421
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1,1,1,131072,0.1412
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,128,0.1049
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,256,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,512,0.1158
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,1024,0.1132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,2048,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,4096,0.1203
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,8192,0.1174
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,16384,0.1213
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,32768,0.1296
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,65536,0.1393
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,2,1,1,131072,0.1607
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,128,0.1162
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,256,0.1118
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,512,0.1133
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,1024,0.1116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,2048,0.1216
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,4096,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,8192,0.1298
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,16384,0.1341
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,32768,0.1390
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,65536,0.1627
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,4,1,1,131072,0.1890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,128,0.1194
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,256,0.1173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,1024,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,2048,0.1273
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,4096,0.1291
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,8192,0.1357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,512,0.1213
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,16384,0.1428
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,32768,0.1665
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,65536,0.1937
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,8,1,1,131072,0.2578
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,128,0.1214
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,256,0.1270
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,512,0.1275
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,1024,0.1290
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,2048,0.1298
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,4096,0.1378
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,8192,0.1494
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,16384,0.1662
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,32768,0.2017
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,65536,0.2765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,16,1,1,131072,0.4142
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,128,0.1281
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,256,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,512,0.1320
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,1024,0.1397
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,2048,0.1459
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,4096,0.1574
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,8192,0.1708
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,16384,0.2098
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,32768,0.2771
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,65536,0.4217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,32,1,1,131072,0.6983
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,128,0.1413
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,256,0.1419
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,512,0.1473
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,1024,0.1520
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,2048,0.1606
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,4096,0.1776
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,8192,0.2149
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,16384,0.2852
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,32768,0.4231
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,65536,0.7045
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,64,1,1,131072,1.2565
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,128,0.1686
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,256,0.1688
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,512,0.1731
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,2048,0.1992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,1024,0.1811
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,4096,0.2385
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,8192,0.3064
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,16384,0.4482
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,32768,0.7300
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,65536,1.2984
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,128,1,1,131072,2.4728
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,128,0.2111
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,256,0.2121
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,512,0.2221
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,2048,0.2793
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,4096,0.3509
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,1024,0.2425
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,8192,0.4872
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,16384,0.7651
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,32768,1.3223
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,65536,2.4877
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,128,0.2958
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,256,1,1,131072,4.8805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,256,0.2968
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,512,0.3140
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,2048,0.4127
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,1024,0.3473
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,4096,0.5394
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,8192,0.7850
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,16384,1.2807
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,32768,2.3059
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,512,1,1,65536,4.5117
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,128,0.4952
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,256,0.4948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,512,0.5261
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2048,0.7198
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4096,0.9710
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,1024,0.5917
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8192,1.4588
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,128,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,256,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16384,2.4535
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,512,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,1024,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,2048,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,4096,0.1092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,8192,0.1098
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,16384,0.1073
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32768,4.5067
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,32768,0.1133
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,65536,0.1269
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1,1,1,131072,0.1319
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,128,0.1057
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,256,0.1073
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,512,0.1054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,1024,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,2048,0.1014
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,4096,0.1029
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,8192,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,16384,0.1154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,32768,0.1266
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,65536,0.1352
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,2,1,1,131072,0.1497
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,128,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,256,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,512,0.1091
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,1024,0.1051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,2048,0.1059
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,4096,0.1165
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,8192,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,16384,0.1181
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,32768,0.1265
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,65536,0.1454
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,4,1,1,131072,0.1644
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,128,0.1134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,256,0.1100
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,512,0.1092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,1024,0.1160
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,2048,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,4096,0.1229
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,8192,0.1199
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,16384,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,32768,0.1469
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,65536,0.1675
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,8,1,1,131072,0.2098
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,128,0.1134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,256,0.1133
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,512,0.1169
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,1024,0.1214
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,2048,0.1255
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,4096,0.1285
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,8192,0.1351
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,16384,0.1502
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,32768,0.1744
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,65536,0.2149
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,16,1,1,131072,0.3022
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,128,0.1217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,256,0.1173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,512,0.1224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,2048,0.1275
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,4096,0.1371
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,8192,0.1544
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,16384,0.1789
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,32768,0.2226
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,1024,0.1238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,65536,0.3099
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,32,1,1,131072,0.4916
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,128,0.1282
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,256,0.1255
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,512,0.1289
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,1024,0.1373
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,2048,0.1477
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,4096,0.1585
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,8192,0.1821
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,16384,0.2294
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,32768,0.3155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,65536,0.4960
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,64,1,1,131072,0.8538
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,128,0.1387
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,256,0.1422
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,512,0.1482
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,2048,0.1653
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,1024,0.1528
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,4096,0.1888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,8192,0.2360
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,16384,0.3236
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,32768,0.5031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,65536,0.8622
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,128,1,1,131072,1.5738
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,128,0.1681
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,256,0.1719
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,512,0.1759
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,2048,0.2156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,1024,0.1909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,4096,0.2616
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,8192,0.3535
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,16384,0.5330
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,32768,0.8908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,65536,1.6096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,256,1,1,131072,3.0464
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,128,0.2136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,256,0.2181
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,2048,0.3054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,512,0.2322
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,4096,0.3985
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,1024,0.2568
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,16384,0.9456
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,8192,0.5823
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,32768,1.6753
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,512,1,1,65536,3.1311
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,128,0.3140
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,256,0.3195
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,512,0.3460
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2048,0.4790
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4096,0.6517
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,1024,0.3908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16384,1.6754
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8192,0.9929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,128,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32768,3.0408
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,256,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,512,0.0944
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,2048,0.1036
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,4096,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,1024,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,8192,0.0982
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,16384,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,32768,0.1070
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,65536,0.1174
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1,1,1,131072,0.1319
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,128,0.0970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,256,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,512,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,1024,0.1008
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,2048,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,4096,0.1009
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,8192,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,16384,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,32768,0.1139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,65536,0.1283
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,2,1,1,131072,0.1391
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,128,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,256,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,512,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,1024,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,2048,0.0988
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,4096,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,8192,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,16384,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,32768,0.1265
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,65536,0.1443
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,4,1,1,131072,0.1608
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,128,0.1085
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,256,0.1093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,1024,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,512,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,2048,0.1063
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,4096,0.1176
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,8192,0.1155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,16384,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,32768,0.1407
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,65536,0.1627
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,8,1,1,131072,0.2053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,128,0.1154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,256,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,512,0.1178
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,1024,0.1144
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,2048,0.1152
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,4096,0.1217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,8192,0.1361
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,16384,0.1502
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,32768,0.1669
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,65536,0.2125
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,16,1,1,131072,0.2985
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,128,0.1156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,256,0.1175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,512,0.1176
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,1024,0.1215
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,2048,0.1271
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,4096,0.1332
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,8192,0.1472
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,16384,0.1688
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,32768,0.2158
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,65536,0.3062
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,32,1,1,131072,0.4835
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,128,0.1201
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,256,0.1220
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,512,0.1227
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,1024,0.1275
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,2048,0.1370
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,4096,0.1513
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,8192,0.1735
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,16384,0.2225
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,32768,0.3107
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,65536,0.4879
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,64,1,1,131072,0.8470
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,128,0.1263
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,256,0.1292
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,512,0.1333
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,1024,0.1418
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,2048,0.1544
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,4096,0.1793
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,8192,0.2232
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,16384,0.3129
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,32768,0.4935
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,65536,0.8499
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,128,1,1,131072,1.5562
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,128,0.1444
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,256,0.1467
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,512,0.1575
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,1024,0.1689
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,2048,0.1933
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,4096,0.2390
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,8192,0.3289
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,32768,0.8671
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,65536,1.5860
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,16384,0.5093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,256,1,1,131072,3.0229
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,128,0.1776
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,256,0.1810
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,512,0.1977
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,2048,0.2677
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,4096,0.3602
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,16384,0.9070
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,8192,0.5436
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,1024,0.2204
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,32768,1.6340
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,512,1,1,65536,3.0901
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,128,0.2395
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,256,0.2459
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2048,0.4063
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,512,0.2721
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4096,0.5776
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,1024,0.3181
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8192,0.9186
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,128,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,256,0.0989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,512,0.0955
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16384,1.6007
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,1024,0.0984
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,2048,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,4096,0.0910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,8192,0.1009
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,16384,0.1051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32768,2.9645
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,32768,0.1073
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,65536,0.1151
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1,1,1,131072,0.1214
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,128,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,256,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,512,0.0968
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,1024,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,2048,0.1049
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,4096,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,8192,0.1093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,16384,0.1001
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,32768,0.1135
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,65536,0.1258
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,2,1,1,131072,0.1387
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,128,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,256,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,512,0.1090
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,1024,0.0961
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,2048,0.1041
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,4096,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,8192,0.1035
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,16384,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,32768,0.1238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,65536,0.1422
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,4,1,1,131072,0.1659
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,256,0.1034
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,512,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,1024,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,128,0.1096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,2048,0.1050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,4096,0.1066
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,16384,0.1288
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,8192,0.1135
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,32768,0.1389
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,65536,0.1669
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,8,1,1,131072,0.2101
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,128,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,256,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,512,0.1135
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,1024,0.1117
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,2048,0.1117
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,4096,0.1217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,8192,0.1296
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,16384,0.1401
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,32768,0.1668
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,65536,0.2069
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,16,1,1,131072,0.2951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,128,0.1133
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,256,0.1156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,512,0.1173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,1024,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,2048,0.1193
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,4096,0.1297
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,8192,0.1480
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,16384,0.1664
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,32768,0.2141
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,65536,0.3067
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,32,1,1,131072,0.4815
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,128,0.1217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,256,0.1155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,512,0.1214
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,1024,0.1237
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,2048,0.1337
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,4096,0.1465
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,8192,0.1716
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,16384,0.2154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,32768,0.3097
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,65536,0.4859
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,64,1,1,131072,0.8425
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,128,0.1216
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,256,0.1203
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,512,0.1252
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,1024,0.1370
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,2048,0.1511
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,4096,0.1722
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,8192,0.2213
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,16384,0.3073
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,32768,0.4867
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,65536,0.8427
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,128,1,1,131072,1.5548
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,128,0.1341
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,256,0.1356
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,512,0.1477
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,1024,0.1605
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,2048,0.1852
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,4096,0.2301
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,8192,0.3198
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,16384,0.4986
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,32768,0.8578
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,65536,1.5741
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,256,1,1,131072,3.0093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,128,0.1608
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,256,0.1657
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,2048,0.2487
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,512,0.1767
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,1024,0.2028
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,4096,0.3404
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,16384,0.8853
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,8192,0.5257
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,32768,1.6164
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,512,1,1,65536,3.0728
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,128,0.2096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,256,0.2141
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,512,0.2417
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2048,0.3718
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4096,0.5443
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,1024,0.2836
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,128,0.0949
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,256,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,512,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,1024,0.0972
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,2048,0.0898
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,4096,0.0989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,8192,0.0972
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32768,2.9301
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,16384,0.0965
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,32768,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8192,0.8830
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16384,1.5674
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,65536,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1,1,1,131072,0.1295
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,128,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,256,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,512,0.0885
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,1024,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,2048,0.0973
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,4096,0.1015
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,16384,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,8192,0.0933
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,32768,0.1022
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,65536,0.1258
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,2,1,1,131072,0.1332
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,128,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,256,0.0927
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,512,0.1040
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,1024,0.0954
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,2048,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,4096,0.1060
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,8192,0.1100
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,16384,0.1081
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,32768,0.1159
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,65536,0.1319
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,4,1,1,131072,0.1610
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,128,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,256,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,512,0.1041
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,2048,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,4096,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,8192,0.1093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,1024,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,16384,0.1265
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,32768,0.1373
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,65536,0.1592
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,8,1,1,131072,0.2022
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,128,0.1054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,256,0.1154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,512,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,1024,0.1135
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,2048,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,4096,0.1175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,8192,0.1234
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,16384,0.1393
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,32768,0.1610
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,65536,0.2131
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,16,1,1,131072,0.2925
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,128,0.1137
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,256,0.1138
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,512,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,1024,0.1128
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,2048,0.1166
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,4096,0.1251
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,8192,0.1473
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,16384,0.1665
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,32768,0.2077
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,65536,0.3026
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,32,1,1,131072,0.4771
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,128,0.1154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,256,0.1134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,512,0.1155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,1024,0.1216
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,2048,0.1323
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,4096,0.1483
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,8192,0.1658
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,16384,0.2122
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,32768,0.3031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,65536,0.4813
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,64,1,1,131072,0.8410
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,128,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,256,0.1235
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,512,0.1238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,2048,0.1498
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,1024,0.1334
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,4096,0.1704
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,8192,0.2159
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,16384,0.3050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,32768,0.4870
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,65536,0.8434
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,128,1,1,131072,1.5492
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,128,0.1295
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,256,0.1321
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,512,0.1434
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,2048,0.1801
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,4096,0.2244
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,8192,0.3163
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,16384,0.4926
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,1024,0.1541
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,32768,0.8525
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,65536,1.5719
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,256,1,1,131072,3.0056
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,128,0.1524
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,256,0.1562
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,512,0.1706
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,2048,0.2400
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,4096,0.3299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,1024,0.1938
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,8192,0.5142
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,32768,1.6048
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,16384,0.8755
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,512,1,1,65536,3.0590
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,128,0.1935
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,256,0.1963
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,512,0.2191
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2048,0.3514
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4096,0.5223
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,1024,0.2657
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8192,0.8634
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,256,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,128,0.0847
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,512,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16384,1.5449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,1024,0.0967
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,2048,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,4096,0.0972
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,8192,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,16384,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,32768,0.0986
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32768,2.9104
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,65536,0.1124
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1,1,1,131072,0.1234
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,128,0.0870
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,256,0.0892
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,512,0.0983
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,1024,0.0883
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,2048,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,4096,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,8192,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,16384,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,32768,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,65536,0.1192
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,2,1,1,131072,0.1343
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,128,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,256,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,512,0.0911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,1024,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,2048,0.0953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,4096,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,8192,0.0970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,16384,0.1054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,32768,0.1128
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,65536,0.1296
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,4,1,1,131072,0.1540
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,128,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,256,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,512,0.1060
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,1024,0.1002
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,2048,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,4096,0.1081
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,8192,0.1051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,16384,0.1184
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,32768,0.1386
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,65536,0.1569
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,8,1,1,131072,0.1988
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,128,0.1073
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,256,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,512,0.1051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,1024,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,2048,0.1116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,4096,0.1164
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,8192,0.1215
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,16384,0.1414
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,32768,0.1586
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,65536,0.2022
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,16,1,1,131072,0.2898
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,128,0.1093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,256,0.1120
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,512,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,1024,0.1090
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,2048,0.1156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,4096,0.1210
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,8192,0.1377
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,16384,0.1616
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,32768,0.2115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,65536,0.2986
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,32,1,1,131072,0.4728
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,128,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,256,0.1132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,512,0.1112
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,1024,0.1134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,2048,0.1230
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,4096,0.1448
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,8192,0.1636
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,16384,0.2103
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,32768,0.2999
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,65536,0.4768
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,64,1,1,131072,0.8342
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,128,0.1139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,256,0.1174
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,512,0.1184
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,1024,0.1316
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,2048,0.1430
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,4096,0.1671
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,8192,0.2139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,16384,0.3011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,32768,0.4821
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,65536,0.8361
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,128,1,1,131072,1.5533
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,128,0.1258
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,256,0.1259
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,512,0.1373
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,1024,0.1536
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,2048,0.1745
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,4096,0.2223
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,8192,0.3115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,16384,0.4910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,65536,1.5643
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,32768,0.8507
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,256,1,1,131072,3.0002
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,128,0.1477
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,256,0.1506
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,512,0.1619
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,1024,0.1880
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,2048,0.2326
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,4096,0.3250
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,8192,0.5092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,16384,0.8693
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,32768,1.6007
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,512,1,1,65536,3.0519
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,128,0.1852
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,256,0.1888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2048,0.3428
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,512,0.2124
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4096,0.5145
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,1024,0.2536
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8192,0.8519
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,128,0.1146
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,256,0.1217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,512,0.1241
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16384,1.5322
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32768,2.8968
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,1024,0.1241
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,2048,0.1233
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,4096,0.1275
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,8192,0.1396
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,16384,0.1456
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,32768,0.1440
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,65536,0.1472
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1,1,131072,0.1564
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,128,0.1265
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,256,0.1210
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,512,0.1298
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,2048,0.1319
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,1024,0.1302
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,4096,0.1355
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,8192,0.1437
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,16384,0.1385
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,32768,0.1487
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,65536,0.1619
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1,1,131072,0.1849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,128,0.1298
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,256,0.1257
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,512,0.1317
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,1024,0.1341
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,2048,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,4096,0.1472
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,8192,0.1417
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,16384,0.1500
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,32768,0.1585
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,65536,0.1825
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1,1,131072,0.2364
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,128,0.1317
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,256,0.1360
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,1024,0.1466
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,2048,0.1530
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,4096,0.1418
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,8192,0.1484
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,512,0.1401
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,16384,0.1644
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,32768,0.1860
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,65536,0.2330
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1,1,131072,0.3344
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,128,0.1467
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,256,0.1480
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,512,0.1491
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,1024,0.1565
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,2048,0.1516
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,4096,0.1560
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,8192,0.1710
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,16384,0.2022
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,32768,0.2535
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,65536,0.3578
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1,1,131072,0.5691
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,128,0.1509
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,256,0.1506
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,512,0.1553
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,2048,0.1673
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,4096,0.1857
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,8192,0.2077
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,16384,0.2654
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,1024,0.1630
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,32768,0.3685
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,65536,0.5775
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1,1,131072,0.9952
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,128,0.1708
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,256,0.1721
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,512,0.1750
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,1024,0.1796
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,2048,0.1929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,4096,0.2230
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,8192,0.2730
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,16384,0.3775
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,32768,0.5911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,65536,1.0070
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1,1,131072,1.8480
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,128,0.2070
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,256,0.2132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,512,0.2193
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,1024,0.2355
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,2048,0.2626
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,4096,0.3191
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,8192,0.4258
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,16384,0.6309
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,32768,1.0517
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,65536,1.8921
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1,1,131072,3.5754
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,128,0.2888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,256,0.2942
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,512,0.3101
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,1024,0.3404
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,2048,0.3958
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,4096,0.5033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,8192,0.7116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,16384,1.1345
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,32768,1.9832
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,65536,3.6720
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,1,1,131072,7.0674
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,128,0.4683
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,256,0.4849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,512,0.5096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,2048,0.6636
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,1024,0.5621
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,4096,0.8583
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,8192,1.2437
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,16384,2.0129
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,32768,3.5630
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,512,1,1,65536,6.7044
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,128,0.8232
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,256,0.8454
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,2048,1.1953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,512,0.8981
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,1024,1.0002
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,4096,1.5833
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,16384,3.9020
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,8192,2.3573
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,128,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,512,0.0908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,256,0.0973
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,1024,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,2048,0.0949
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,4096,0.0961
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,8192,0.1040
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,16384,0.1234
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,32768,0.1341
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,65536,0.1336
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1,1,131072,0.1499
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1024,1,1,32768,7.0077
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,128,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,256,0.0926
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,512,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,1024,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,2048,0.1116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,4096,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,8192,0.1187
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,16384,0.1285
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,32768,0.1405
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,65536,0.1499
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1,1,131072,0.1806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,128,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,512,0.1050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,1024,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,256,0.0993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,2048,0.1108
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,4096,0.1195
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,8192,0.1336
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,16384,0.1340
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,32768,0.1515
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,65536,0.1755
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1,1,131072,0.2328
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,128,0.1034
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,256,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,512,0.1133
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,1024,0.1149
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,2048,0.1276
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,4096,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,8192,0.1389
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,16384,0.1509
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,32768,0.1827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,65536,0.2282
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1,1,131072,0.3392
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,128,0.1149
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,256,0.1173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,512,0.1209
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,1024,0.1321
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,2048,0.1438
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,4096,0.1442
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,8192,0.1606
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,16384,0.1873
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,32768,0.2411
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,65536,0.3400
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1,1,131072,0.5564
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,128,0.1237
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,256,0.1298
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,512,0.1381
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,1024,0.1483
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,2048,0.1466
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,4096,0.1631
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,8192,0.1907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,16384,0.2455
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,32768,0.3511
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,65536,0.5651
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1,1,131072,0.9853
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,128,0.1340
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,256,0.1399
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,512,0.1524
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,1024,0.1569
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,2048,0.1728
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,4096,0.2003
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,8192,0.2553
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,16384,0.3592
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,32768,0.5765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,65536,1.0013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1,1,131072,1.8205
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,128,0.1598
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,256,0.1642
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,512,0.1731
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,1024,0.1859
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,2048,0.2122
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,4096,0.2693
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,8192,0.3729
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,16384,0.5899
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,32768,0.9973
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,65536,1.8363
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1,1,131072,3.4965
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,128,0.2022
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,256,0.2103
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,512,0.2266
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,2048,0.3101
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,4096,0.4155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,1024,0.2585
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,8192,0.6298
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,16384,1.0636
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,32768,1.9142
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,65536,3.5997
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,128,0.2976
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,256,0.3134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,512,0.3410
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,1,1,131072,6.9709
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,2048,0.5067
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,1024,0.3989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,4096,0.7263
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,16384,2.0695
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,8192,1.1685
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,32768,3.7627
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,512,1,1,65536,7.2565
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,128,0.4955
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,256,0.5224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,512,0.5722
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,2048,0.8802
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,1024,0.6752
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,4096,1.2815
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,16384,3.8261
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,8192,2.1267
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,128,0.0847
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,256,0.0894
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,512,0.0847
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1024,1,1,32768,7.0135
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,1024,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,2048,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,4096,0.0896
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,8192,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,16384,0.0985
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,32768,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,65536,0.1259
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1,1,131072,0.1528
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,128,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,256,0.0846
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,512,0.0896
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,1024,0.0877
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,2048,0.0934
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,8192,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,16384,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,32768,0.1324
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,65536,0.1469
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,131072,0.1791
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1,1,4096,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,128,0.0877
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,256,0.0953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,512,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,1024,0.0957
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,2048,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,4096,0.1001
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,8192,0.1143
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,16384,0.1322
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,32768,0.1480
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,65536,0.1827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1,1,131072,0.2587
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,128,0.1003
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,256,0.0962
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,512,0.0984
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,2048,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,4096,0.1084
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,8192,0.1355
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,16384,0.1528
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,1024,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,32768,0.1877
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,65536,0.2593
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1,1,131072,0.3984
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,128,0.1026
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,256,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,512,0.1082
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,1024,0.1081
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,2048,0.1183
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,4096,0.1368
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,8192,0.1557
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,16384,0.1952
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,32768,0.2726
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,65536,0.4271
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1,1,131072,0.7401
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,128,0.1082
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,256,0.1077
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,512,0.1107
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,1024,0.1301
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,2048,0.1421
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,4096,0.1628
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,8192,0.1997
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,16384,0.2787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,32768,0.4334
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,65536,0.7436
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1,1,131072,1.3627
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,128,0.1175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,256,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,512,0.1332
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,1024,0.1447
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,2048,0.1642
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,4096,0.2012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,8192,0.2798
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,16384,0.4317
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,32768,0.7358
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,65536,1.3528
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1,1,131072,2.5701
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,128,0.1404
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,256,0.1440
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,512,0.1587
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,2048,0.2142
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,1024,0.1777
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,4096,0.2952
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,8192,0.4448
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,32768,1.3883
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,16384,0.7498
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,65536,2.7216
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1,1,131072,5.2849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,128,0.1744
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,256,0.1764
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,512,0.1997
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,1024,0.2387
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,2048,0.3167
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,4096,0.4657
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,8192,0.7679
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,16384,1.3912
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,32768,2.7060
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,65536,5.3885
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,128,0.2355
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,256,0.2420
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,512,0.2805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,1,1,131072,10.7509
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,1024,0.3520
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,2048,0.4911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,4096,0.7636
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,8192,1.3070
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,16384,2.5119
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,32768,5.0641
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,512,1,1,65536,10.1464
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,128,0.3665
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,256,0.3750
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,2048,0.8756
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,512,0.4516
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,1024,0.5940
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,4096,1.4145
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,8192,2.4982
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,16384,4.9196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,128,0.0807
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,256,0.0806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,512,0.0808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,1024,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,2048,0.0847
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,4096,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1024,1,1,32768,10.0745
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,8192,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,16384,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,32768,0.1068
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,65536,0.1161
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1,1,131072,0.1318
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,128,0.0886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,256,0.0826
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,512,0.0907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,1024,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,2048,0.0850
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,4096,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,8192,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,16384,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,32768,0.1066
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,65536,0.1408
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1,1,131072,0.1645
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,128,0.0886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,256,0.0849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,512,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,1024,0.0970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,2048,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,4096,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,8192,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,16384,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,32768,0.1316
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,65536,0.1567
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1,1,131072,0.2179
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,128,0.0975
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,256,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,512,0.0924
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,1024,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,4096,0.0993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,8192,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,16384,0.1397
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,2048,0.1004
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,32768,0.1613
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,65536,0.2185
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1,1,131072,0.3188
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,128,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,256,0.0972
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,512,0.1043
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,1024,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,2048,0.1070
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,4096,0.1134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,8192,0.1383
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,16384,0.1699
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,32768,0.2258
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,65536,0.3238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1,1,131072,0.5265
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,128,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,256,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,512,0.1098
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,1024,0.1108
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,2048,0.1236
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,4096,0.1485
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,8192,0.1742
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,16384,0.2201
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,32768,0.3287
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,65536,0.5440
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1,1,131072,0.9518
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,128,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,256,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,512,0.1154
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,1024,0.1278
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,2048,0.1499
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,4096,0.1756
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,8192,0.2308
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,16384,0.3361
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,32768,0.5469
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,65536,0.9555
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1,1,131072,1.7739
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,128,0.1177
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,256,0.1197
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,512,0.1351
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,2048,0.1791
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,1024,0.1538
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,4096,0.2286
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,8192,0.3340
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,16384,0.5381
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,32768,0.9521
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,65536,1.7711
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1,1,131072,3.4035
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,128,0.1411
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,256,0.1479
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,512,0.1641
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,1024,0.1916
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,2048,0.2404
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,4096,0.3468
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,16384,0.9753
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,8192,0.5563
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,32768,1.7957
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,65536,3.4396
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,128,0.1810
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,1,1,131072,6.7620
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,256,0.1887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,2048,0.3746
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,512,0.2212
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,4096,0.5782
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,1024,0.2710
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,8192,1.0071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,16384,1.8498
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,32768,3.5211
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,128,0.2581
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,512,1,1,65536,6.8766
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,256,0.2725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,2048,0.6159
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,512,0.3244
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,4096,1.0096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,1024,0.4222
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,8192,1.8228
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,128,0.0807
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,256,0.0751
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,512,0.0807
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,1024,0.0866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,2048,0.0809
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,4096,0.0804
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,8192,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,32768,6.5852
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1024,1,1,16384,3.4396
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,16384,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,32768,0.0986
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,65536,0.1171
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1,1,131072,0.1275
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,128,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,256,0.0784
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,512,0.0828
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,1024,0.0827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,2048,0.0829
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,4096,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,8192,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,16384,0.0973
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,32768,0.1042
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,65536,0.1363
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1,1,131072,0.1587
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,128,0.0807
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,256,0.0908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,512,0.0853
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,1024,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,2048,0.0928
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,4096,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,8192,0.0911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,16384,0.1041
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,32768,0.1296
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,65536,0.1618
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1,1,131072,0.2107
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,128,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,256,0.0870
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,512,0.0965
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,1024,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,2048,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,4096,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,8192,0.1096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,16384,0.1308
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,32768,0.1608
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,65536,0.2107
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1,1,131072,0.3148
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,128,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,256,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,512,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,1024,0.0981
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,2048,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,4096,0.1110
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,8192,0.1378
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,16384,0.1679
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,32768,0.2221
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,65536,0.3254
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1,1,131072,0.5176
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,128,0.1014
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,256,0.0993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,512,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,1024,0.1050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,2048,0.1131
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,4096,0.1431
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,8192,0.1682
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,16384,0.2214
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,32768,0.3206
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,65536,0.5354
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1,1,131072,0.9427
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,128,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,256,0.1056
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,512,0.1113
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,1024,0.1217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,4096,0.1691
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,8192,0.2213
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,16384,0.3284
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,2048,0.1426
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,32768,0.5397
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,65536,0.9441
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1,1,131072,1.7631
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,128,0.1134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,256,0.1111
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,512,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,2048,0.1703
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,1024,0.1462
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,4096,0.2243
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,16384,0.5386
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,8192,0.3293
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,32768,0.9441
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,65536,1.7557
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1,1,131072,3.3766
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,128,0.1235
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,256,0.1326
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,2048,0.2321
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,512,0.1523
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,1024,0.1800
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,4096,0.3378
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,8192,0.5436
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,16384,0.9697
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,32768,1.7713
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,65536,3.4147
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,128,0.1601
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,1,1,131072,6.7236
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,256,0.1695
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,2048,0.3500
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,512,0.1953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,4096,0.5565
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,1024,0.2464
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,16384,1.8422
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,8192,0.9854
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,32768,3.4691
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,512,1,1,65536,6.8425
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,128,0.2138
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,256,0.2299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,512,0.2793
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,2048,0.5675
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,1024,0.3749
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,4096,0.9691
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,16384,3.4089
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,8192,1.7639
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,128,0.0742
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,256,0.0769
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,512,0.0767
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,1024,0.0851
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,8192,0.0805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,16384,0.0844
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,32768,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,65536,0.1077
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,2048,0.0788
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,4096,0.0865
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1,1,131072,0.1328
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1024,1,1,32768,6.4893
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,128,0.0827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,256,0.0785
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,512,0.0809
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,1024,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,4096,0.0827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,8192,0.0870
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,32768,0.0993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,65536,0.1377
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,131072,0.1594
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,2048,0.0892
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1,1,16384,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,128,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,256,0.0871
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,512,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,1024,0.0830
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,4096,0.0907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,8192,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,16384,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,32768,0.1318
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,65536,0.1605
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,2048,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1,1,131072,0.2051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,128,0.0870
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,256,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,512,0.0887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,2048,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,1024,0.0892
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,4096,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,8192,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,16384,0.1357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,32768,0.1569
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,65536,0.2147
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1,1,131072,0.3155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,128,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,256,0.0970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,512,0.0994
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,1024,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,2048,0.1018
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,4096,0.1051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,8192,0.1322
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,16384,0.1632
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,32768,0.2156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,65536,0.3183
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1,1,131072,0.5152
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,128,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,256,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,512,0.1050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,1024,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,2048,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,4096,0.1427
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,8192,0.1635
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,16384,0.2202
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,32768,0.3204
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,65536,0.5303
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1,1,131072,0.9459
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,128,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,256,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,512,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,2048,0.1428
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,1024,0.1124
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,4096,0.1678
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,8192,0.2213
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,16384,0.3241
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,32768,0.5385
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,65536,0.9427
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1,1,131072,1.7673
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,128,0.1073
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,256,0.1086
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,512,0.1169
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,2048,0.1679
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,1024,0.1429
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,4096,0.2207
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,8192,0.3232
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,32768,0.9365
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,16384,0.5377
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,65536,1.7481
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1,1,131072,3.3852
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,128,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,256,0.1267
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,512,0.1462
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,2048,0.2247
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,1024,0.1733
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,4096,0.3287
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,16384,0.9671
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,8192,0.5387
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,32768,1.7797
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,65536,3.4160
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,1,1,131072,6.7267
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,128,0.1487
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,256,0.1569
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,2048,0.3396
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,512,0.1883
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,4096,0.5470
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,1024,0.2408
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,8192,0.9736
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,16384,1.8260
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,32768,3.4767
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,512,1,1,65536,6.8248
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,128,0.1958
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,256,0.2132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,512,0.2616
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,2048,0.5506
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,1024,0.3587
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,4096,0.9492
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,8192,1.7518
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,16384,3.3743
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,128,0.0801
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,256,0.0712
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,1024,0.0770
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,2048,0.0847
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1024,1,1,32768,6.5148
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,4096,0.0766
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,512,0.0827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,8192,0.0784
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,16384,0.0829
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,32768,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,65536,0.1119
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1,1,131072,0.1318
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,128,0.0827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,256,0.0769
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,512,0.0867
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,1024,0.0826
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,2048,0.0853
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,4096,0.0805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,8192,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,16384,0.0872
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,32768,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,65536,0.1312
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1,1,131072,0.1588
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,128,0.0841
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,256,0.0808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,512,0.0832
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,1024,0.0849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,2048,0.0907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,4096,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,8192,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,16384,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,32768,0.1277
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,65536,0.1529
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1,1,131072,0.2025
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,128,0.0848
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,256,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,512,0.0940
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,1024,0.0970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,2048,0.0952
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,4096,0.0940
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,8192,0.1039
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,16384,0.1294
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,32768,0.1623
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,65536,0.2089
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1,1,131072,0.3111
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,128,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,256,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,512,0.0932
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,1024,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,2048,0.0999
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,4096,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,8192,0.1358
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,16384,0.1581
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,32768,0.2166
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,65536,0.3186
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1,1,131072,0.5130
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,128,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,256,0.1006
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,512,0.1056
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,1024,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,2048,0.1145
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,4096,0.1360
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,8192,0.1611
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,16384,0.2122
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,32768,0.3219
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,65536,0.5311
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1,1,131072,0.9402
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,128,0.1034
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,256,0.1040
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,512,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,1024,0.1132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,2048,0.1400
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,4096,0.1691
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,8192,0.2153
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,16384,0.3244
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,32768,0.5337
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,65536,0.9443
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1,1,131072,1.7618
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,128,0.1056
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,256,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,512,0.1172
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,2048,0.1647
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,1024,0.1388
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,4096,0.2210
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,8192,0.3232
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,16384,0.5329
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,32768,0.9355
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,65536,1.7570
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1,1,131072,3.3763
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,128,0.1178
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,256,0.1227
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,512,0.1442
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,1024,0.1732
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,2048,0.2228
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,4096,0.3264
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,8192,0.5350
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,16384,0.9574
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,32768,1.7681
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,65536,3.4249
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,128,0.1442
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,1,1,131072,6.7113
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,256,0.1544
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,512,0.1806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,2048,0.3317
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,1024,0.2341
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,4096,0.5434
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,8192,0.9596
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,16384,1.8169
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,32768,3.4537
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,512,1,1,65536,6.8335
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,128,0.1875
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,256,0.2029
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,2048,0.5408
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,512,0.2517
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,1024,0.3453
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,4096,0.9311
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,16384,3.3798
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,8192,1.7397
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,128,0.0786
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,256,0.0725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,512,0.0804
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,1024,0.0766
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,2048,0.0802
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,4096,0.0764
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,8192,0.0765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,16384,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,32768,0.0908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,65536,0.1060
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1024,1,1,32768,6.5225
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1,1,131072,0.1305
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,128,0.0805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,256,0.0766
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,512,0.0848
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,1024,0.0785
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,2048,0.0835
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,4096,0.0784
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,8192,0.0830
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,16384,0.0859
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,32768,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,65536,0.1322
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1,1,131072,0.1529
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,128,0.0776
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,256,0.0787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,512,0.0806
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,1024,0.0865
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,2048,0.0828
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,4096,0.0923
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,8192,0.0913
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,16384,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,32768,0.1254
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,65536,0.1529
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1,1,131072,0.2001
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,128,0.0893
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,256,0.0911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,512,0.0865
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,1024,0.0866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,2048,0.0889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,4096,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,8192,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,16384,0.1310
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,32768,0.1573
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,65536,0.2143
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1,1,131072,0.3077
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,128,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,256,0.0911
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,512,0.0961
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,1024,0.0931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,2048,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,8192,0.1343
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,16384,0.1557
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,32768,0.2124
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,4096,0.1029
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,65536,0.3151
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1,1,131072,0.5169
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,128,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,256,0.0953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,512,0.1004
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,1024,0.0989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,2048,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,4096,0.1319
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,8192,0.1601
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,16384,0.2141
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,32768,0.3125
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,65536,0.5271
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1,1,131072,0.9395
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,128,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,256,0.0970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,512,0.0998
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,1024,0.1054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,2048,0.1394
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,4096,0.1633
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,8192,0.2142
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,16384,0.3175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,32768,0.5277
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,65536,0.9399
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1,1,131072,1.7561
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,128,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,256,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,512,0.1126
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,1024,0.1388
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,2048,0.1615
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,4096,0.2122
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,8192,0.3224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,16384,0.5301
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,32768,0.9360
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,65536,1.7496
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1,1,131072,3.3740
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,128,0.1133
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,256,0.1168
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,512,0.1398
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,2048,0.2177
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,1024,0.1682
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,4096,0.3244
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,8192,0.5302
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,16384,0.9451
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,32768,1.7683
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,65536,3.4024
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,128,0.1401
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,256,0.1511
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,1,1,131072,6.7153
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,512,0.1746
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,1024,0.2285
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,2048,0.3296
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,4096,0.5364
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,8192,0.9566
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,16384,1.8204
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,32768,3.4482
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,512,1,1,65536,6.8203
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,128,0.1823
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,256,0.1953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,512,0.2460
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,2048,0.5319
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,1024,0.3396
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,4096,0.9273
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,16384,3.3654
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,8192,1.7280
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1024,1,1,32768,6.5217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,128,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,256,0.1163
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,512,0.1151
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,1024,0.1155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,2048,0.1234
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,4096,0.1193
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,8192,0.1230
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,16384,0.1302
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,32768,0.1405
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,65536,0.1399
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1,1,1,131072,0.1424
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,128,0.1226
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,256,0.1173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,512,0.1224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,1024,0.1223
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,2048,0.1223
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,4096,0.1254
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,8192,0.1316
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,16384,0.1368
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,65536,0.1430
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,32768,0.1421
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,2,1,1,131072,0.1572
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,128,0.1217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,256,0.1255
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,512,0.1217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,1024,0.1281
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,2048,0.1320
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,4096,0.1319
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,8192,0.1318
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,16384,0.1359
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,32768,0.1436
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,65536,0.1603
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,4,1,1,131072,0.1842
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,128,0.1297
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,256,0.1257
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,512,0.1318
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,1024,0.1338
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,2048,0.1360
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,4096,0.1338
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,8192,0.1371
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,16384,0.1445
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,32768,0.1615
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,65536,0.1801
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,8,1,1,131072,0.2353
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,128,0.1323
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,256,0.1343
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,512,0.1387
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,1024,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,2048,0.1400
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,4096,0.1503
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,8192,0.1526
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,16384,0.1661
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,32768,0.1930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,65536,0.2477
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,16,1,1,131072,0.3479
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,128,0.1390
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,256,0.1387
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,512,0.1502
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,1024,0.1521
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,2048,0.1487
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,4096,0.1564
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,16384,0.1995
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,8192,0.1710
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,32768,0.2517
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,65536,0.3545
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,32,1,1,131072,0.5724
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,128,0.1461
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,256,0.1539
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,512,0.1525
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,2048,0.1651
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,1024,0.1544
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,8192,0.2003
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,16384,0.2529
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,32768,0.3648
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,65536,0.5711
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,4096,0.1729
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,64,1,1,131072,0.9900
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,128,0.1678
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,256,0.1688
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,512,0.1732
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,1024,0.1896
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,2048,0.2032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,4096,0.2241
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,8192,0.2751
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,16384,0.3796
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,32768,0.5959
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,65536,1.0230
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,128,1,1,131072,1.8681
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,128,0.2097
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,256,0.2116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,512,0.2342
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,2048,0.2633
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,4096,0.3180
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,1024,0.2486
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,8192,0.4235
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,16384,0.6407
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,32768,1.0712
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,65536,1.9148
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,256,1,1,131072,3.6132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,128,0.2999
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,256,0.3064
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,512,0.3206
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,2048,0.3984
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,4096,0.4987
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,1024,0.3759
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,8192,0.6912
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,16384,1.1006
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,32768,1.8317
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,512,1,1,65536,3.3499
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,128,0.4971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,256,0.5036
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2048,0.6861
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,512,0.5887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16384,2.0205
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4096,0.8845
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,1024,0.6429
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8192,1.2679
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,128,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,256,0.0871
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,1024,0.0949
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,512,0.0983
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,2048,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,4096,0.1037
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,8192,0.0972
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32768,3.5396
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,16384,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,32768,0.1137
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,65536,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1,1,1,131072,0.1341
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,128,0.0982
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,512,0.0954
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,1024,0.0972
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,256,0.0921
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,2048,0.0970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,4096,0.1051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,8192,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,16384,0.1190
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,32768,0.1237
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,65536,0.1357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,2,1,1,131072,0.1449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,128,0.1000
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,256,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,512,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,1024,0.1045
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,2048,0.1038
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,4096,0.1101
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,8192,0.1192
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,16384,0.1238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,32768,0.1299
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,65536,0.1449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,4,1,1,131072,0.1641
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,128,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,256,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,512,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,1024,0.1056
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,2048,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,4096,0.1153
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,8192,0.1213
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,16384,0.1346
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,32768,0.1462
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,65536,0.1647
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,8,1,1,131072,0.2118
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,128,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,512,0.1133
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,256,0.1061
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,1024,0.1139
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,2048,0.1231
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,4096,0.1342
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,8192,0.1385
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,16384,0.1491
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,32768,0.1687
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,65536,0.2144
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,16,1,1,131072,0.3109
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,128,0.1198
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,256,0.1136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,512,0.1197
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,1024,0.1272
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,2048,0.1313
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,4096,0.1390
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,16384,0.1748
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,8192,0.1524
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,65536,0.3227
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,32768,0.2290
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,32,1,1,131072,0.5179
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,128,0.1228
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,256,0.1219
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,512,0.1347
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,1024,0.1384
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,2048,0.1429
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,4096,0.1584
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,8192,0.1858
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,16384,0.2322
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,32768,0.3280
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,65536,0.5257
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,64,1,1,131072,0.9085
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,128,0.1399
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,256,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,512,0.1468
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,2048,0.1612
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,4096,0.1889
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,8192,0.2364
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,16384,0.3356
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,1024,0.1498
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,32768,0.5337
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,65536,0.9237
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,128,1,1,131072,1.6917
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,128,0.1585
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,256,0.1609
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,512,0.1760
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,2048,0.2118
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,4096,0.2650
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,1024,0.1834
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,8192,0.3679
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,16384,0.5608
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,32768,0.9563
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,65536,1.7302
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,256,1,1,131072,3.2769
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,128,0.2251
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,256,0.2198
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,512,0.2435
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,2048,0.3138
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,1024,0.2577
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,4096,0.4134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,16384,1.0304
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,8192,0.6156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,32768,1.8195
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,512,1,1,65536,3.3725
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,128,0.3449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,256,0.3292
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,512,0.3799
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2048,0.4990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,1024,0.4039
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4096,0.7114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16384,1.7727
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8192,1.0470
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,128,0.0886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,256,0.0975
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,512,0.0907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,1024,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,2048,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,4096,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,8192,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,16384,0.1084
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,32768,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32768,3.2508
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,65536,0.1152
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1,1,1,131072,0.1339
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,128,0.0982
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,256,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,512,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,1024,0.0949
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,2048,0.1034
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,4096,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,8192,0.1002
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,16384,0.1092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,32768,0.1117
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,65536,0.1288
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,2,1,1,131072,0.1499
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,128,0.0968
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,256,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,512,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,1024,0.0989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,2048,0.1067
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,4096,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,8192,0.1050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,16384,0.1238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,32768,0.1318
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,65536,0.1508
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,4,1,1,131072,0.1836
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,128,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,256,0.1029
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,512,0.1091
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,2048,0.1136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,4096,0.1153
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,8192,0.1205
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,16384,0.1360
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,32768,0.1537
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,1024,0.1075
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,65536,0.1828
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,8,1,1,131072,0.2516
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,128,0.1091
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,256,0.1124
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,512,0.1106
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,1024,0.1174
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,2048,0.1156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,4096,0.1300
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,8192,0.1381
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,16384,0.1598
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,32768,0.1919
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,65536,0.2644
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,16,1,1,131072,0.4056
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,128,0.1216
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,256,0.1157
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,512,0.1194
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,1024,0.1234
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,2048,0.1349
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,4096,0.1453
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,8192,0.1670
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,16384,0.1971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,32768,0.2732
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,65536,0.4100
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,32,1,1,131072,0.6924
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,128,0.1259
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,256,0.1291
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,512,0.1300
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,1024,0.1445
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,2048,0.1552
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,4096,0.1702
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,16384,0.2747
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,32768,0.4163
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,65536,0.6980
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,8192,0.2089
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,64,1,1,131072,1.2544
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,128,0.1521
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,256,0.1565
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,512,0.1625
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,2048,0.1929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,1024,0.1709
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,4096,0.2294
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,16384,0.4370
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,8192,0.2986
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,32768,0.7183
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,65536,1.2853
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,128,1,1,131072,2.4527
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,128,0.1946
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,256,0.1947
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,512,0.2066
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,2048,0.2632
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,1024,0.2253
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,4096,0.3354
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,8192,0.4734
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,16384,0.7510
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,32768,1.3074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,65536,2.4723
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,256,1,1,131072,4.8878
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,128,0.2738
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,256,0.2739
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,512,0.2931
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,2048,0.3945
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,1024,0.3272
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,4096,0.5177
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,8192,0.7669
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,32768,2.2835
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,16384,1.2562
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,512,1,1,65536,4.5034
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,128,0.4617
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,256,0.4622
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,512,0.4941
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2048,0.6859
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,1024,0.5574
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4096,0.9387
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8192,1.4279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,128,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32768,4.4684
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,256,0.0872
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,512,0.0887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,1024,0.0972
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,2048,0.0907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16384,2.4111
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,4096,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,8192,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,16384,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,32768,0.1072
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,65536,0.1116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1,1,1,131072,0.1216
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,128,0.0896
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,256,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,512,0.0989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,1024,0.0919
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,2048,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,4096,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,8192,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,16384,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,32768,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,65536,0.1243
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,2,1,1,131072,0.1384
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,128,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,256,0.0989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,512,0.1002
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,1024,0.1040
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,2048,0.0953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,4096,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,8192,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,16384,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,32768,0.1178
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,65536,0.1377
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,4,1,1,131072,0.1640
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,128,0.0989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,256,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,512,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,1024,0.1076
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,2048,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,4096,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,8192,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,16384,0.1203
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,32768,0.1418
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,65536,0.1650
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,8,1,1,131072,0.2091
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,128,0.1096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,256,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,512,0.1072
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,1024,0.1094
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,2048,0.1134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,4096,0.1136
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,8192,0.1311
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,16384,0.1462
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,32768,0.1689
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,65536,0.2109
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,16,1,1,131072,0.2925
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,128,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,256,0.1117
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,512,0.1175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,1024,0.1177
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,2048,0.1194
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,4096,0.1279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,16384,0.1688
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,32768,0.2171
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,65536,0.3030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,8192,0.1462
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,32,1,1,131072,0.4852
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,128,0.1175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,256,0.1181
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,512,0.1175
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,1024,0.1233
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,2048,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,4096,0.1503
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,8192,0.1746
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,16384,0.2223
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,32768,0.3100
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,65536,0.4891
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,64,1,1,131072,0.8475
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,128,0.1277
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,256,0.1280
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,512,0.1323
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,1024,0.1462
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,2048,0.1570
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,4096,0.1844
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,8192,0.2246
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,16384,0.3173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,32768,0.4967
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,65536,0.8536
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,128,1,1,131072,1.5637
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,128,0.1511
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,256,0.1567
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,512,0.1647
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,1024,0.1787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,2048,0.2050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,4096,0.2488
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,8192,0.3419
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,16384,0.5183
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,32768,0.8819
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,65536,1.5967
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,256,1,1,131072,3.0321
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,128,0.1994
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,256,0.2034
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,512,0.2143
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,1024,0.2399
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,2048,0.2899
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,4096,0.3839
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,8192,0.5637
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,16384,0.9286
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,32768,1.6555
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,512,1,1,65536,3.1185
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,128,0.2881
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,256,0.2935
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,512,0.3224
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2048,0.4554
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16384,1.6495
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4096,0.6278
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,1024,0.3673
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8192,0.9696
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32768,3.0173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,128,0.0858
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,256,0.0848
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,512,0.0910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,1024,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,2048,0.0934
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,4096,0.0891
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,8192,0.0872
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,16384,0.0934
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,32768,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,65536,0.1090
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1,1,1,131072,0.1159
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,128,0.0886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,256,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,512,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,1024,0.0965
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,2048,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,4096,0.0927
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,8192,0.0995
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,16384,0.0973
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,32768,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,65536,0.1127
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,2,1,1,131072,0.1375
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,128,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,256,0.0950
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,512,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,1024,0.0949
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,2048,0.0910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,4096,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,8192,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,16384,0.1040
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,32768,0.1142
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,65536,0.1376
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,4,1,1,131072,0.1525
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,128,0.1009
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,256,0.0957
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,512,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,1024,0.0968
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,2048,0.1011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,4096,0.1100
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,8192,0.1038
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,16384,0.1121
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,32768,0.1385
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,65536,0.1598
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,8,1,1,131072,0.1961
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,128,0.1015
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,256,0.1093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,512,0.1096
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,1024,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,2048,0.1055
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,4096,0.1090
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,8192,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,16384,0.1368
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,32768,0.1665
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,65536,0.2033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,16,1,1,131072,0.2879
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,128,0.1072
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,256,0.1072
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,512,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,1024,0.1110
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,2048,0.1118
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,4096,0.1233
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,8192,0.1435
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,16384,0.1638
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,32768,0.2075
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,65536,0.2976
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,32,1,1,131072,0.4787
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,128,0.1155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,256,0.1132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,512,0.1141
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,1024,0.1156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,2048,0.1235
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,4096,0.1461
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,8192,0.1667
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,16384,0.2140
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,32768,0.3051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,65536,0.4825
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,64,1,1,131072,0.8374
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,128,0.1193
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,256,0.1176
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,512,0.1193
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,1024,0.1298
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,2048,0.1466
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,4096,0.1729
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,8192,0.2176
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,16384,0.3052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,32768,0.4867
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,65536,0.8382
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,128,1,1,131072,1.5538
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,128,0.1318
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,256,0.1321
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,512,0.1462
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,1024,0.1579
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,2048,0.1821
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,4096,0.2285
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,16384,0.5013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,8192,0.3202
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,32768,0.8585
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,65536,1.5736
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,256,1,1,131072,3.0065
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,128,0.1608
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,256,0.1668
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,512,0.1811
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,2048,0.2528
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,1024,0.2072
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,4096,0.3465
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,16384,0.8928
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,8192,0.5265
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,32768,1.6180
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,512,1,1,65536,3.0759
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,128,0.2201
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,256,0.2283
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,512,0.2520
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2048,0.3849
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,1024,0.2978
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4096,0.5592
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8192,0.8976
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,128,0.0829
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,256,0.0807
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16384,1.5808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,512,0.0908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,1024,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,2048,0.0835
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,4096,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32768,2.9463
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,8192,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,16384,0.0965
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,32768,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,65536,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1,1,1,131072,0.1176
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,128,0.0928
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,256,0.0868
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,512,0.0953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,1024,0.0885
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,4096,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,8192,0.0916
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,2048,0.0888
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,16384,0.1010
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,32768,0.1005
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,65536,0.1169
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,2,1,1,131072,0.1338
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,128,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,256,0.0968
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,512,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,1024,0.0907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,2048,0.0934
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,4096,0.0951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,8192,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,16384,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,32768,0.1066
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,65536,0.1357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,4,1,1,131072,0.1570
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,128,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,256,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,512,0.1033
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,1024,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,2048,0.1051
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,4096,0.0998
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,8192,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,16384,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,32768,0.1332
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,65536,0.1618
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,8,1,1,131072,0.2023
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,128,0.0993
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,256,0.0999
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,512,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,1024,0.1015
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,2048,0.1072
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,4096,0.1072
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,8192,0.1195
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,16384,0.1404
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,32768,0.1565
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,65536,0.2004
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,16,1,1,131072,0.2876
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,128,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,256,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,512,0.1079
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,1024,0.1129
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,2048,0.1095
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,4096,0.1217
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,8192,0.1357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,16384,0.1616
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,32768,0.2044
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,65536,0.3011
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,32,1,1,131072,0.4758
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,128,0.1092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,256,0.1093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,512,0.1155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,1024,0.1132
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,2048,0.1240
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,16384,0.2102
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,4096,0.1409
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,8192,0.1635
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,32768,0.3008
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,65536,0.4786
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,64,1,1,131072,0.8401
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,128,0.1160
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,256,0.1155
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,512,0.1200
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,1024,0.1238
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,2048,0.1449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,4096,0.1648
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,8192,0.2109
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,16384,0.3020
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,32768,0.4803
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,65536,0.8363
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,128,1,1,131072,1.5526
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,128,0.1252
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,256,0.1257
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,2048,0.1743
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,512,0.1336
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,4096,0.2204
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,1024,0.1500
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,8192,0.3106
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,16384,0.4899
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,32768,0.8526
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,65536,1.5676
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,256,1,1,131072,3.0018
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,128,0.1439
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,256,0.1503
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,512,0.1625
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,2048,0.2347
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,1024,0.1887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,4096,0.3259
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,16384,0.8714
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,32768,1.6026
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,8192,0.5092
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,512,1,1,65536,3.0580
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,128,0.1907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,256,0.1971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2048,0.3534
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,512,0.2226
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16384,1.5473
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4096,0.5253
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,1024,0.2666
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8192,0.8671
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,128,0.0807
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,256,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,1024,0.0824
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,2048,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,4096,0.0805
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,8192,0.0869
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,16384,0.0856
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,512,0.0886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32768,2.9141
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,32768,0.0999
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,65536,0.1025
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1,1,1,131072,0.1112
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,128,0.0839
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,256,0.0907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,512,0.0846
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,1024,0.0846
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,2048,0.0927
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,4096,0.0887
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,8192,0.0970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,16384,0.0908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,32768,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,65536,0.1138
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,2,1,1,131072,0.1313
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,128,0.0953
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,256,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,512,0.0969
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,1024,0.0908
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,2048,0.0907
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,4096,0.0998
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,8192,0.1023
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,16384,0.0975
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,32768,0.1074
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,65536,0.1330
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,4,1,1,131072,0.1500
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,128,0.0932
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,256,0.0928
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,512,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,1024,0.1042
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,2048,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,4096,0.1050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,8192,0.0990
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,16384,0.1082
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,32768,0.1357
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,65536,0.1568
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,8,1,1,131072,0.2023
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,128,0.1072
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,256,0.1054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,512,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,2048,0.1093
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,4096,0.1052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,8192,0.1135
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,16384,0.1338
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,1024,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,32768,0.1623
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,65536,0.2040
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,16,1,1,131072,0.2832
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,128,0.1030
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,256,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,512,0.1113
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,2048,0.1122
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,1024,0.1071
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,4096,0.1137
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,8192,0.1336
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,16384,0.1605
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,32768,0.2043
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,65536,0.2927
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,32,1,1,131072,0.4789
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,128,0.1112
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,256,0.1114
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,512,0.1108
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,1024,0.1143
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,2048,0.1176
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,4096,0.1380
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,8192,0.1612
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,16384,0.2068
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,32768,0.2997
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,65536,0.4793
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,64,1,1,131072,0.8339
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,128,0.1116
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,256,0.1134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,512,0.1160
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,1024,0.1237
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,2048,0.1400
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,4096,0.1662
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,16384,0.2983
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,8192,0.2102
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,32768,0.4781
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,65536,0.8351
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,128,1,1,131072,1.5449
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,128,0.1194
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,256,0.1196
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,512,0.1279
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,1024,0.1444
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,2048,0.1708
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,4096,0.2164
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,8192,0.3080
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,32768,0.8434
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,16384,0.4842
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,65536,1.5622
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,256,1,1,131072,2.9937
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,128,0.1356
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,256,0.1438
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,512,0.1578
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,2048,0.2276
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,4096,0.3182
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,16384,0.8621
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,1024,0.1807
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,8192,0.4989
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,32768,1.5923
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,512,1,1,65536,3.0491
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,128,0.1765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,256,0.1814
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,512,0.2047
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2048,0.3354
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4096,0.5050
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16384,1.5291
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,1024,0.2479
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8192,0.8471
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,128,0.0823
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,256,0.0785
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,512,0.0804
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,1024,0.0791
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,2048,0.0854
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,4096,0.0808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,8192,0.0886
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,16384,0.0910
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,32768,0.0928
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32768,2.8954
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,65536,0.0965
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1,1,1,131072,0.1148
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,128,0.0850
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,256,0.0893
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,512,0.0827
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,1024,0.0829
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,2048,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,4096,0.0828
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,16384,0.0930
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,32768,0.0937
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,65536,0.1073
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,131072,0.1341
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,2,1,1,8192,0.0932
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,128,0.0845
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,256,0.0897
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,512,0.0866
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,1024,0.0890
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,2048,0.0871
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,4096,0.0948
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,8192,0.0991
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,16384,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,32768,0.1013
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,65536,0.1236
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,4,1,1,131072,0.1453
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,256,0.0909
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,512,0.0929
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,1024,0.0992
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,128,0.0913
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,2048,0.0944
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,4096,0.0971
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,8192,0.1054
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,16384,0.1117
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,32768,0.1273
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,65536,0.1514
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,8,1,1,131072,0.1978
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,128,0.0975
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,256,0.1031
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,512,0.0968
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,1024,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,2048,0.1014
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,4096,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,8192,0.1156
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,16384,0.1316
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,32768,0.1565
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,65536,0.1974
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,16,1,1,131072,0.2808
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,128,0.1012
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,256,0.1032
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,512,0.1039
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,1024,0.1018
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,2048,0.1053
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,4096,0.1148
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,8192,0.1365
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,16384,0.1546
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,32768,0.2006
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,65536,0.2933
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,32,1,1,131072,0.4694
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,128,0.1029
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,256,0.1057
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,512,0.1115
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,1024,0.1072
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,2048,0.1138
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,4096,0.1338
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,16384,0.2052
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,8192,0.1569
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,32768,0.2958
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,131072,0.8281
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,64,1,1,65536,0.4765
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,128,0.1076
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,256,0.1113
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,512,0.1134
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,2048,0.1398
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,1024,0.1179
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,4096,0.1594
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,16384,0.2955
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,8192,0.2056
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,32768,0.4731
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,65536,0.8324
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,128,1,1,131072,1.5424
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,128,0.1173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,256,0.1189
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,512,0.1239
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,1024,0.1429
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,2048,0.1647
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,4096,0.2122
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,16384,0.4816
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,8192,0.3017
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,32768,0.8409
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,65536,1.5563
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,256,1,1,131072,2.9896
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,128,0.1326
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,256,0.1358
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,512,0.1516
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,2048,0.2218
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,4096,0.3121
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,16384,0.8602
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,1024,0.1729
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,8192,0.4951
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,32768,1.5901
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,512,1,1,65536,3.0429
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,128,0.1688
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,256,0.1735
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2048,0.3262
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,512,0.1976
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4096,0.4970
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,1024,0.2386
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8192,0.8390
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16384,1.5173
VLLM,0.17.0,NVIDIA B200,mla_generation_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32768,2.8838
