framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,16,1,0,0.1588
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,64,1,0,0.1751
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1024,1,0,0.5284
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,128,1,0,0.1872
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,256,1,0,0.2181
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,1536,1,0,0.7610
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,32,1,0,0.1624
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,512,1,0,0.3167
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,2048,1,0,1.0298
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,3072,1,0,1.6005
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,4096,1,0,2.3028
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,6144,1,0,3.6714
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,10240,1,0,7.6046
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,12288,1,0,9.9372
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,8192,1,0,5.7461
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,16384,1,0,15.3486
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,16,1,0,0.1671
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,32,1,0,0.1772
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,64,1,0,0.1901
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,256,1,0,0.3100
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,128,1,0,0.2196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,512,1,0,0.5108
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1024,1,0,0.9576
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,1536,1,0,1.4742
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,2048,1,0,2.0462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,3072,1,0,3.2510
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,1,32768,1,0,47.7907
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,4096,1,0,4.6210
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,6144,1,0,7.8448
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,8192,1,0,11.5160
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,10240,1,0,15.7545
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,16,1,0,0.1961
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,32,1,0,0.2093
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,12288,1,0,21.0422
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,64,1,0,0.2387
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,16384,1,0,31.0676
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,128,1,0,0.3260
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,256,1,0,0.5185
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,512,1,0,0.9394
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1024,1,0,1.9225
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,1536,1,0,2.8622
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,2048,1,0,4.1747
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,3072,1,0,6.6249
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,4096,1,0,9.4642
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,6144,1,0,15.6049
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,2,32768,1,0,94.5340
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,8192,1,0,23.0227
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,10240,1,0,31.6862
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,12288,1,0,40.8659
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,16,1,0,0.2175
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,32,1,0,0.2487
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,64,1,0,0.3309
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,128,1,0,0.5209
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,16384,1,0,62.9752
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,256,1,0,0.9337
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,512,1,0,1.8351
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1024,1,0,3.8806
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,1536,1,0,6.1133
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,2048,1,0,8.4058
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,3072,1,0,13.0282
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,4096,1,0,18.5824
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,6144,1,0,31.8023
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,16,1,0,0.2672
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,8192,1,0,46.2349
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,64,1,0,0.5387
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,32,1,0,0.3505
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,10240,1,0,62.7113
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,128,1,0,0.9335
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,256,1,0,1.8162
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,12288,1,0,79.6970
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,4,32768,1,0,186.2485
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,512,1,0,3.8176
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1024,1,0,7.8919
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,1536,1,0,11.8288
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,2048,1,0,16.7329
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,8,16384,1,0,122.2467
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,3072,1,0,27.0467
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,16,1,0,0.3885
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,32,1,0,0.5756
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,4096,1,0,38.9734
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,64,1,0,0.9746
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,128,1,0,1.8321
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,256,1,0,3.6914
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,512,1,0,7.6160
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,6144,1,0,62.0013
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1024,1,0,15.7110
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,1536,1,0,24.6254
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,16,8192,1,0,90.3009
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,16,1,0,0.6553
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,2048,1,0,33.9277
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,32,1,0,1.0501
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,64,1,0,1.9480
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,3072,1,0,52.8719
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,128,1,0,3.5943
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,256,1,0,7.6693
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,512,1,0,15.1658
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,32,4096,1,0,73.2326
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,16,1,0,1.2018
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1024,1,0,31.6841
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,32,1,0,2.1465
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,64,1,0,3.9684
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,1536,1,0,47.9753
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,128,1,0,7.5039
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,256,1,0,14.8086
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,16,1,0,2.4407
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,64,2048,1,0,65.6199
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,512,1,0,30.6528
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,32,1,0,4.3601
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,64,1,0,7.7272
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,128,1,0,15.1707
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,16,1,0,0.1401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,32,1,0,0.1447
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,128,1024,1,0,61.8079
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,64,1,0,0.1517
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,256,1,0,30.6999
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,128,1,0,0.1623
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,256,1,0,0.1836
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,512,1,0,0.2368
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1024,1,0,0.3685
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,1536,1,0,0.4946
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,2048,1,0,0.6537
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,3072,1,0,0.9354
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,128,256,512,1,0,60.2787
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,4096,1,0,1.3085
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,6144,1,0,2.1128
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,8192,1,0,3.1166
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,10240,1,0,4.2056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,12288,1,0,5.5329
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,16384,1,0,8.6358
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,16,1,0,0.1447
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,32,1,0,0.1531
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,64,1,0,0.1589
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,1,32768,1,0,25.6516
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,128,1,0,0.1788
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,256,1,0,0.2340
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,512,1,0,0.3573
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1024,1,0,0.6181
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,1536,1,0,0.8658
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,2048,1,0,1.1995
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,3072,1,0,1.8146
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,4096,1,0,2.5768
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,6144,1,0,4.2849
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,8192,1,0,6.2351
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,10240,1,0,8.3644
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,12288,1,0,11.0374
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,16384,1,0,17.0439
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,16,1,0,0.1555
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,32,1,0,0.1631
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,64,1,0,0.1828
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,128,1,0,0.2345
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,256,1,0,0.3501
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,512,1,0,0.5987
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1024,1,0,1.1253
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,2,32768,1,0,49.6936
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,1536,1,0,1.6863
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,2048,1,0,2.3076
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,3072,1,0,3.6728
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,4096,1,0,5.2324
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,6144,1,0,8.6429
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,8192,1,0,12.5212
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,16,1,0,0.1689
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,10240,1,0,17.2395
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,12288,1,0,22.4327
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,32,1,0,0.1885
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,64,1,0,0.2405
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,16384,1,0,33.8989
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,128,1,0,0.3520
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,256,1,0,0.5892
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,512,1,0,1.0918
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1024,1,0,2.2456
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,1536,1,0,3.4377
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,2048,1,0,4.7141
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,3072,1,0,7.4208
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,4096,1,0,10.6560
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,6144,1,0,17.3618
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,8192,1,0,25.7050
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,4,32768,1,0,100.3728
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,16,1,0,0.1970
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,32,1,0,0.2489
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,10240,1,0,35.1445
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,64,1,0,0.3584
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,12288,1,0,45.3431
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,128,1,0,0.5881
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,256,1,0,1.0711
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,512,1,0,2.1619
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1024,1,0,4.5369
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,1536,1,0,7.0411
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,8,16384,1,0,68.4975
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,2048,1,0,9.7657
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,3072,1,0,15.0154
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,4096,1,0,21.1915
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,16,1,0,0.2701
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,32,1,0,0.3779
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,64,1,0,0.6084
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,6144,1,0,36.4122
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,128,1,0,1.0813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,256,1,0,2.1661
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,512,1,0,4.4157
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,16,8192,1,0,51.6874
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1024,1,0,9.1091
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,1536,1,0,13.9404
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,16,1,0,0.4178
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,2048,1,0,20.2238
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,32,1,0,0.6448
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,64,1,0,1.1054
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,3072,1,0,30.9794
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,128,1,0,2.1216
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,256,1,0,4.2573
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,32,4096,1,0,43.8649
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,512,1,0,8.9772
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1024,1,0,18.4270
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,16,1,0,0.7225
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,32,1,0,1.1866
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,1536,1,0,28.5539
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,64,1,0,2.2464
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,128,1,0,4.4007
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,64,2048,1,0,40.1917
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,256,1,0,8.7547
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,512,1,0,17.8867
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,16,1,0,1.3398
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,32,1,0,2.3884
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,64,1,0,4.6323
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,128,1,0,8.7892
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,128,1024,1,0,37.8889
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,16,1,0,0.1254
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,32,1,0,0.1289
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,256,1,0,17.6182
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,64,1,0,0.1335
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,128,1,0,0.1398
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,256,1,0,0.1533
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,512,1,0,0.1857
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,64,256,512,1,0,36.0066
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1024,1,0,0.2548
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,1536,1,0,0.3330
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,2048,1,0,0.4183
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,3072,1,0,0.5770
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,4096,1,0,0.7754
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,6144,1,0,1.2240
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,8192,1,0,1.7018
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,10240,1,0,2.3295
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,12288,1,0,2.9267
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,16384,1,0,4.6614
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,1,32768,1,0,12.9640
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,16,1,0,0.1299
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,32,1,0,0.1321
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,64,1,0,0.1402
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,128,1,0,0.1539
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,256,1,0,0.1846
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1024,1,0,0.3973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,512,1,0,0.2493
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,1536,1,0,0.5312
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,2048,1,0,0.7051
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,3072,1,0,1.0786
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,4096,1,0,1.4608
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,6144,1,0,2.3550
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,8192,1,0,3.5024
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,10240,1,0,4.6613
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,12288,1,0,5.9611
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,16384,1,0,8.9621
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,16,1,0,0.1319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,32,1,0,0.1398
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,64,1,0,0.1510
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,128,1,0,0.1824
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,2,32768,1,0,27.2134
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,256,1,0,0.2462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1024,1,0,0.6676
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,512,1,0,0.3840
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,1536,1,0,0.9999
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,2048,1,0,1.3390
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,3072,1,0,2.0722
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,4096,1,0,3.0213
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,6144,1,0,4.8188
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,8192,1,0,6.9465
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,10240,1,0,9.3940
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,12288,1,0,11.9447
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,16384,1,0,19.1478
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,16,1,0,0.1437
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,32,1,0,0.1533
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,64,1,0,0.1836
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,128,1,0,0.2453
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,256,1,0,0.3785
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,512,1,0,0.6512
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1024,1,0,1.2833
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,1536,1,0,1.9876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,4,32768,1,0,53.3528
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,2048,1,0,2.7195
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,3072,1,0,4.3156
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,4096,1,0,6.0589
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,6144,1,0,9.6200
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,8192,1,0,14.1403
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,16,1,0,0.1591
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,10240,1,0,19.8984
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,32,1,0,0.1863
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,12288,1,0,25.0813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,64,1,0,0.2514
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,128,1,0,0.3794
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,8,16384,1,0,37.0423
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,256,1,0,0.6418
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,512,1,0,1.2533
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1024,1,0,2.5385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,1536,1,0,4.0541
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,2048,1,0,5.4584
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,3072,1,0,8.6662
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,4096,1,0,12.2642
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,16,1,0,0.1998
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,32,1,0,0.2589
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,6144,1,0,20.0502
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,64,1,0,0.3864
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,128,1,0,0.6482
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,16,8192,1,0,28.6648
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,256,1,0,1.2407
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,512,1,0,2.5136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1024,1,0,5.1675
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,1536,1,0,7.9609
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,2048,1,0,11.1053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,16,1,0,0.2791
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,32,1,0,0.4068
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,3072,1,0,17.4212
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,64,1,0,0.6597
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,32,4096,1,0,24.9489
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,128,1,0,1.2338
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,256,1,0,2.4482
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,512,1,0,5.0596
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1024,1,0,10.5305
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,16,1,0,0.4458
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,1536,1,0,16.3134
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,32,1,0,0.6984
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,64,2048,1,0,22.5736
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,64,1,0,1.2749
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,128,1,0,2.5219
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,256,1,0,5.0206
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,512,1,0,10.2380
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,16,1,0,0.7763
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,32,1,0,1.3587
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,64,1,0,2.5591
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,128,1,0,5.0671
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,128,1024,1,0,21.9807
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,16,1,0,0.1044
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,256,1,0,10.1007
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,32,1,0,0.1097
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,64,1,0,0.1185
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,128,1,0,0.1217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,256,1,0,0.1400
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,32,256,512,1,0,20.6425
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,512,1,0,0.1615
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1024,1,0,0.2057
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,1536,1,0,0.2531
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,2048,1,0,0.2934
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,3072,1,0,0.4045
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,4096,1,0,0.5292
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,6144,1,0,0.7942
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,8192,1,0,1.0735
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,10240,1,0,1.4122
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,12288,1,0,1.7665
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,16384,1,0,2.5544
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,1,32768,1,0,7.2636
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,16,1,0,0.1074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,32,1,0,0.1159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,64,1,0,0.1257
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,128,1,0,0.1367
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,256,1,0,0.1547
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,512,1,0,0.2006
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1024,1,0,0.2802
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,1536,1,0,0.3792
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,2048,1,0,0.4850
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,3072,1,0,0.7092
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,4096,1,0,0.9383
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,6144,1,0,1.4813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,8192,1,0,2.0953
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,10240,1,0,2.7531
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,12288,1,0,3.6011
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,16384,1,0,5.3266
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,16,1,0,0.1194
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,2,32768,1,0,14.5042
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,32,1,0,0.1199
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,64,1,0,0.1353
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,128,1,0,0.1542
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,256,1,0,0.1949
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,512,1,0,0.2736
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1024,1,0,0.4614
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,1536,1,0,0.6640
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,2048,1,0,0.8689
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,3072,1,0,1.3385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,4096,1,0,1.8494
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,6144,1,0,2.9866
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,8192,1,0,4.3020
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,10240,1,0,5.6713
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,12288,1,0,7.3427
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,16,1,0,0.1214
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,16384,1,0,10.3360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,32,1,0,0.1299
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,64,1,0,0.1524
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,128,1,0,0.1943
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,256,1,0,0.2706
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,4,32768,1,0,30.0819
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,512,1,0,0.4532
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1024,1,0,0.8353
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,1536,1,0,1.2533
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,2048,1,0,1.7079
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,3072,1,0,2.6549
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,4096,1,0,3.7551
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,6144,1,0,6.1131
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,8192,1,0,8.5217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,10240,1,0,11.6454
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,16,1,0,0.1378
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,12288,1,0,14.6935
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,32,1,0,0.1566
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,8,16384,1,0,21.1056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,64,1,0,0.1976
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,128,1,0,0.2716
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,256,1,0,0.4440
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,512,1,0,0.8192
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1024,1,0,1.6696
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,1536,1,0,2.5242
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,2048,1,0,3.4613
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,3072,1,0,5.5365
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,4096,1,0,7.5256
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,6144,1,0,12.3001
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,16,1,0,0.1614
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,32,1,0,0.2030
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,64,1,0,0.2764
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,16,8192,1,0,17.3677
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,128,1,0,0.4487
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,256,1,0,0.8092
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,512,1,0,1.6337
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1024,1,0,3.3466
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,1536,1,0,5.1568
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,2048,1,0,6.9446
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,3072,1,0,11.4021
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,16,1,0,0.2118
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,32,4096,1,0,15.2911
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,32,1,0,0.2857
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,64,1,0,0.4536
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,128,1,0,0.8122
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,256,1,0,1.6084
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,512,1,0,3.3775
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1024,1,0,6.7610
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,1536,1,0,10.4678
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,16,1,0,0.3046
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,32,1,0,0.4731
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,64,2048,1,0,14.4763
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,64,1,0,0.8301
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,128,1,0,1.6358
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,256,1,0,3.2913
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,512,1,0,6.6628
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,16,1,0,0.5132
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,128,1024,1,0,13.7973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,32,1,0,0.8662
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,64,1,0,1.6830
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,128,1,0,3.3574
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,256,1,0,6.5788
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,16,1,0,0.1015
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,32,1,0,0.1074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,64,1,0,0.1111
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,16,256,512,1,0,13.6780
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,128,1,0,0.1145
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,256,1,0,0.1283
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,512,1,0,0.1445
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1024,1,0,0.1835
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,1536,1,0,0.2149
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,2048,1,0,0.2514
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,4096,1,0,0.3990
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,3072,1,0,0.3234
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,6144,1,0,0.5821
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,8192,1,0,0.7870
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,10240,1,0,1.0023
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,12288,1,0,1.2288
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,16384,1,0,1.7418
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,1,32768,1,0,4.5390
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,16,1,0,0.1033
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,32,1,0,0.1115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,64,1,0,0.1155
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,128,1,0,0.1259
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,256,1,0,0.1426
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,512,1,0,0.1753
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1024,1,0,0.2369
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,1536,1,0,0.3039
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,2048,1,0,0.3717
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,3072,1,0,0.5344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,4096,1,0,0.7027
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,6144,1,0,1.0597
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,8192,1,0,1.4704
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,10240,1,0,1.8747
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,12288,1,0,2.3155
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,32,1,0,0.1159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,16,1,0,0.1115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,16384,1,0,3.4374
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,2,32768,1,0,8.9346
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,64,1,0,0.1281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,128,1,0,0.1422
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,256,1,0,0.1732
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,512,1,0,0.2315
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1024,1,0,0.3584
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,1536,1,0,0.5094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,2048,1,0,0.6560
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,3072,1,0,0.9791
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,4096,1,0,1.3244
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,6144,1,0,2.0514
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,8192,1,0,2.8529
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,10240,1,0,3.7704
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,12288,1,0,4.8368
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,16384,1,0,6.9808
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,16,1,0,0.1139
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,32,1,0,0.1282
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,4,32768,1,0,18.3906
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,64,1,0,0.1421
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,128,1,0,0.1710
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,256,1,0,0.2290
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,512,1,0,0.3510
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1024,1,0,0.6358
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,1536,1,0,0.9308
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,2048,1,0,1.2648
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,3072,1,0,1.8894
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,4096,1,0,2.6171
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,6144,1,0,4.1472
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,8192,1,0,6.0328
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,10240,1,0,7.7788
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,12288,1,0,9.6998
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,16,1,0,0.1296
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,32,1,0,0.1338
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,8,16384,1,0,13.7556
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,64,1,0,0.1710
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,128,1,0,0.2261
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,256,1,0,0.3476
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,512,1,0,0.6232
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1536,1,0,1.8194
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,1024,1,0,1.2199
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,2048,1,0,2.4520
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,3072,1,0,3.8872
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,4096,1,0,5.3941
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,6144,1,0,8.6706
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,16,8192,1,0,12.0024
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,16,1,0,0.1450
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,32,1,0,0.1742
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,64,1,0,0.2278
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,128,1,0,0.3503
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,256,1,0,0.6140
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,512,1,0,1.2114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1024,1,0,2.4394
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,1536,1,0,3.7320
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,16,1,0,0.1791
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,2048,1,0,5.0920
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,3072,1,0,7.9904
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,32,4096,1,0,10.9196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,32,1,0,0.2327
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,64,1,0,0.3574
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,128,1,0,0.6192
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,256,1,0,1.1952
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,512,1,0,2.3671
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1024,1,0,4.8875
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,1536,1,0,7.6238
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,16,1,0,0.2426
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,64,2048,1,0,10.1551
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,32,1,0,0.3629
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,64,1,0,0.6246
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,128,1,0,1.1999
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,256,1,0,2.3490
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,512,1,0,4.8416
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,16,1,0,0.3839
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,32,1,0,0.6472
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,128,1024,1,0,9.8478
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,64,1,0,1.2135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,128,1,0,2.3863
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,256,1,0,4.8237
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,16,1,0,0.1053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,32,1,0,0.1037
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,64,1,0,0.1095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,8,256,512,1,0,10.1450
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,128,1,0,0.1136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,256,1,0,0.1240
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,512,1,0,0.1381
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1024,1,0,0.1737
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,1536,1,0,0.1971
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,2048,1,0,0.2286
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,3072,1,0,0.2909
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,4096,1,0,0.3620
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,6144,1,0,0.4987
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,8192,1,0,0.6345
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,10240,1,0,0.7826
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,12288,1,0,0.9627
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,16384,1,0,1.3361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,1,32768,1,0,3.0941
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,16,1,0,0.1055
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,32,1,0,0.1096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,64,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,128,1,0,0.1217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,256,1,0,0.1383
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,512,1,0,0.1668
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1024,1,0,0.2159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,2048,1,0,0.3360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,1536,1,0,0.2719
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,3072,1,0,0.4605
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,4096,1,0,0.5806
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,6144,1,0,0.8601
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,8192,1,0,1.1829
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,12288,1,0,1.7797
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,10240,1,0,1.4784
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,16384,1,0,2.6061
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,16,1,0,0.1117
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,2,32768,1,0,6.2028
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,32,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,64,1,0,0.1199
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,128,1,0,0.1322
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,256,1,0,0.1639
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,512,1,0,0.2097
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1536,1,0,0.4350
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,1024,1,0,0.3227
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,2048,1,0,0.5537
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,3072,1,0,0.8102
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,4096,1,0,1.1004
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,6144,1,0,1.6365
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,8192,1,0,2.1998
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,12288,1,0,3.6442
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,10240,1,0,2.8629
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,16384,1,0,5.0791
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,16,1,0,0.1140
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,32,1,0,0.1177
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,4,32768,1,0,12.3329
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,64,1,0,0.1366
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,128,1,0,0.1605
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,256,1,0,0.2078
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,512,1,0,0.3145
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1024,1,0,0.5404
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,1536,1,0,0.7887
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,2048,1,0,1.0568
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,3072,1,0,1.5380
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,4096,1,0,2.0645
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,6144,1,0,3.2793
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,10240,1,0,5.9005
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,8192,1,0,4.5872
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,12288,1,0,7.2372
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,16,1,0,0.1218
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,32,1,0,0.1359
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,8,16384,1,0,10.2639
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,64,1,0,0.1582
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,128,1,0,0.2032
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,256,1,0,0.3122
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,512,1,0,0.5380
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1024,1,0,1.0249
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,1536,1,0,1.4964
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,2048,1,0,1.9942
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,3072,1,0,3.1026
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,4096,1,0,4.2313
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,6144,1,0,6.7856
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,16,1,0,0.1371
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,32,1,0,0.1607
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,16,8192,1,0,9.2068
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,64,1,0,0.2042
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,128,1,0,0.3107
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,256,1,0,0.5304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,512,1,0,1.0254
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1024,1,0,1.9903
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,1536,1,0,3.0009
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,2048,1,0,4.0620
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,3072,1,0,6.3737
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,16,1,0,0.1642
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,32,4096,1,0,8.6141
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,32,1,0,0.2077
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,64,1,0,0.3097
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,128,1,0,0.5305
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,256,1,0,1.0044
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,512,1,0,1.9535
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1024,1,0,4.0134
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,1536,1,0,6.2577
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,16,1,0,0.2120
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,32,1,0,0.3180
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,64,2048,1,0,8.4107
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,64,1,0,0.5375
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,128,1,0,1.0103
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,256,1,0,1.9471
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,512,1,0,4.0320
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,16,1,0,0.3249
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,128,1024,1,0,8.2815
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,64,1,0,1.0152
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,32,1,0,0.5438
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,128,1,0,1.9455
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,256,1,0,3.9268
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,16,1,0,0.1014
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,32,1,0,0.1044
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,4,256,512,1,0,8.2175
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,64,1,0,0.1114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,128,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,256,1,0,0.1241
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,512,1,0,0.1362
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1024,1,0,0.1697
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,1536,1,0,0.1934
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,2048,1,0,0.2224
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,3072,1,0,0.2824
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,4096,1,0,0.3425
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,6144,1,0,0.4646
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,8192,1,0,0.5927
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,10240,1,0,0.7197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,12288,1,0,0.8600
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,16384,1,0,1.1309
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,1,32768,1,0,2.4726
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,16,1,0,0.1074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,32,1,0,0.1084
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,64,1,0,0.1138
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,128,1,0,0.1179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,256,1,0,0.1382
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,512,1,0,0.1634
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1024,1,0,0.2096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,1536,1,0,0.2633
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,2048,1,0,0.3148
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,3072,1,0,0.4249
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,4096,1,0,0.5407
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,6144,1,0,0.7734
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,8192,1,0,1.0355
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,10240,1,0,1.2636
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,12288,1,0,1.5529
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,16384,1,0,2.1171
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,2,32768,1,0,4.7758
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,16,1,0,0.1098
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,32,1,0,0.1136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,64,1,0,0.1200
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,128,1,0,0.1355
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,256,1,0,0.1591
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,512,1,0,0.2039
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1024,1,0,0.3025
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,1536,1,0,0.4062
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,2048,1,0,0.5152
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,3072,1,0,0.7331
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,4096,1,0,0.9781
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,6144,1,0,1.4604
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,8192,1,0,1.9427
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,10240,1,0,2.4785
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,12288,1,0,2.9991
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,16384,1,0,4.1004
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,16,1,0,0.1118
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,4,32768,1,0,9.5555
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,32,1,0,0.1204
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,64,1,0,0.1341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,128,1,0,0.1577
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,256,1,0,0.1996
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,512,1,0,0.2954
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1024,1,0,0.5007
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,1536,1,0,0.7180
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,2048,1,0,0.9464
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,3072,1,0,1.4082
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,4096,1,0,1.8613
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,6144,1,0,2.8626
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,8192,1,0,3.8473
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,10240,1,0,5.0113
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,12288,1,0,6.1360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,16,1,0,0.1177
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,32,1,0,0.1330
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,8,16384,1,0,8.4646
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,64,1,0,0.1569
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,128,1,0,0.1974
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,256,1,0,0.2930
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,512,1,0,0.4959
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1024,1,0,0.9322
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,1536,1,0,1.3820
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,2048,1,0,1.8192
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,3072,1,0,2.7770
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,4096,1,0,3.7396
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,6144,1,0,5.8138
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,16,1,0,0.1259
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,32,1,0,0.1559
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,16,8192,1,0,7.9546
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,128,1,0,0.2897
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,64,1,0,0.1944
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,256,1,0,0.4920
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,512,1,0,0.9232
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1024,1,0,1.7907
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,1536,1,0,2.7207
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,2048,1,0,3.6498
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,3072,1,0,5.7960
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,32,4096,1,0,7.6757
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,16,1,0,0.1571
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,32,1,0,0.1973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,64,1,0,0.2911
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,128,1,0,0.4883
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,256,1,0,0.9243
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,512,1,0,1.7805
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1024,1,0,3.6637
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,1536,1,0,5.5816
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,64,2048,1,0,7.5261
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,32,1,0,0.2929
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,64,1,0,0.4922
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,16,1,0,0.1993
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,128,1,0,0.9226
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,256,1,0,1.8015
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,512,1,0,3.5876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,16,1,0,0.2970
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,128,1024,1,0,7.4503
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,32,1,0,0.4979
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,64,1,0,0.9344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,128,1,0,1.7764
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,256,1,0,3.6644
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,16,1,0,0.0973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,64,1,0,0.1059
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,32,1,0,0.1032
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,128,1,0,0.1096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,2,256,512,1,0,7.4671
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,256,1,0,0.1191
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,512,1,0,0.1360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1024,1,0,0.1660
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,1536,1,0,0.1898
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,2048,1,0,0.2167
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,3072,1,0,0.2772
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,4096,1,0,0.3317
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,6144,1,0,0.4472
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,8192,1,0,0.5687
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,10240,1,0,0.6994
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,12288,1,0,0.8318
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,16384,1,0,1.0937
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,16,1,0,0.0995
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,1,32768,1,0,2.1218
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,32,1,0,0.1053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,64,1,0,0.1094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,128,1,0,0.1175
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,256,1,0,0.1280
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,512,1,0,0.1581
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1024,1,0,0.2039
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,1536,1,0,0.2592
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,2048,1,0,0.3039
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,3072,1,0,0.4062
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,4096,1,0,0.5155
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,6144,1,0,0.7545
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,8192,1,0,0.9888
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,10240,1,0,1.2006
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,12288,1,0,1.4439
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,16384,1,0,1.8951
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,2,32768,1,0,4.0151
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,16,1,0,0.1034
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,32,1,0,0.1095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,64,1,0,0.1177
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,128,1,0,0.1254
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,256,1,0,0.1548
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,512,1,0,0.1975
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1024,1,0,0.2929
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,1536,1,0,0.3857
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,2048,1,0,0.4895
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,3072,1,0,0.7160
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,4096,1,0,0.9327
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,6144,1,0,1.3631
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,8192,1,0,1.8084
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,10240,1,0,2.2492
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,12288,1,0,2.7468
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,16384,1,0,3.6895
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,16,1,0,0.1117
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,4,32768,1,0,8.1249
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,32,1,0,0.1159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,64,1,0,0.1239
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,128,1,0,0.1523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,256,1,0,0.1937
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,512,1,0,0.2839
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1024,1,0,0.4779
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,1536,1,0,0.6905
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,2048,1,0,0.9154
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,3072,1,0,1.3195
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,4096,1,0,1.7474
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,6144,1,0,2.6430
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,8192,1,0,3.5550
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,10240,1,0,4.5723
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,12288,1,0,5.6111
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,8,16384,1,0,7.5735
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,16,1,0,0.1140
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,32,1,0,0.1239
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,64,1,0,0.1505
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,128,1,0,0.1914
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,256,1,0,0.2819
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,512,1,0,0.4694
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1024,1,0,0.9026
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,1536,1,0,1.2988
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,2048,1,0,1.7082
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,3072,1,0,2.6136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,4096,1,0,3.4327
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,6144,1,0,5.3692
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,16,1,0,0.1303
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,32,1,0,0.1522
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,16,8192,1,0,7.2697
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,64,1,0,0.1900
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,128,1,0,0.2778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,256,1,0,0.4660
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,512,1,0,0.8930
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1024,1,0,1.6904
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,1536,1,0,2.6036
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,2048,1,0,3.4416
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,3072,1,0,5.2833
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,16,1,0,0.1505
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,32,4096,1,0,7.1292
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,32,1,0,0.1905
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,64,1,0,0.2795
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,128,1,0,0.4662
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,256,1,0,0.8876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,512,1,0,1.6993
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1024,1,0,3.3807
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,1536,1,0,5.1772
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,64,2048,1,0,6.9929
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,16,1,0,0.1921
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,32,1,0,0.2795
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,64,1,0,0.4634
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,128,1,0,0.8891
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,256,1,0,1.6806
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,512,1,0,3.4023
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,16,1,0,0.2825
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,128,1024,1,0,6.9181
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,32,1,0,0.4658
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,64,1,0,0.8961
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,128,1,0,1.6969
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,256,1,0,3.4267
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,16,1,0,0.1751
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,32,1,0,0.1782
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,float16,1,256,512,1,0,6.9540
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,64,1,0,0.1890
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,128,1,0,0.1998
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,256,1,0,0.2324
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,512,1,0,0.3309
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,1024,1,0,0.5458
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,1536,1,0,0.7761
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,2048,1,0,1.0350
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,3072,1,0,1.6152
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,4096,1,0,2.3030
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,6144,1,0,3.8381
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,8192,1,0,5.8929
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,10240,1,0,7.5068
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,12288,1,0,10.0191
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,16384,1,0,15.4589
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,16,1,0,0.1825
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,32,1,0,0.1915
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,64,1,0,0.2025
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,128,1,0,0.2319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,256,1,0,0.3258
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,512,1,0,0.5268
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,1,32768,1,0,47.6548
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,1024,1,0,0.9766
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,1536,1,0,1.4720
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,2048,1,0,2.0208
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,3072,1,0,3.2164
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,4096,1,0,4.6893
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,6144,1,0,7.8634
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,8192,1,0,11.3030
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,10240,1,0,15.6799
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,12288,1,0,20.2697
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,16,1,0,0.1985
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,32,1,0,0.2087
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,16384,1,0,31.1981
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,64,1,0,0.2399
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,128,1,0,0.3248
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,256,1,0,0.5198
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,512,1,0,0.9505
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,1024,1,0,1.9023
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,1536,1,0,2.8746
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,2048,1,0,4.1287
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,3072,1,0,6.6533
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,4096,1,0,9.2705
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,6144,1,0,16.1018
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,2,32768,1,0,94.8904
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,8192,1,0,23.2629
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,16,1,0,0.2186
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,10240,1,0,31.7918
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,32,1,0,0.2447
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,12288,1,0,41.2965
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,64,1,0,0.3338
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,128,1,0,0.5203
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,256,1,0,0.9290
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,512,1,0,1.8746
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,16384,1,0,62.7973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,1024,1,0,3.8584
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,1536,1,0,6.1262
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,2048,1,0,8.6179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,3072,1,0,13.2728
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,4096,1,0,19.3881
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,6144,1,0,32.1442
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,16,1,0,0.2645
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,8192,1,0,46.7175
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,32,1,0,0.3513
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,10240,1,0,63.2281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,64,1,0,0.5362
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,128,1,0,0.9344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,256,1,0,1.8295
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,12288,1,0,79.2817
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,512,1,0,3.7498
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,4,32768,1,0,188.3338
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,1024,1,0,7.9778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,1536,1,0,12.1403
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,2048,1,0,17.0687
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,16,1,0,0.3887
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,3072,1,0,27.4377
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,8,16384,1,0,124.6246
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,32,1,0,0.5773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,4096,1,0,39.7038
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,64,1,0,0.9709
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,128,1,0,1.8364
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,256,1,0,3.7532
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,6144,1,0,61.0565
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,512,1,0,7.7180
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,1024,1,0,15.6676
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,16,8192,1,0,91.0211
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,1536,1,0,25.1096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,16,1,0,0.6540
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,32,1,0,1.0518
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,2048,1,0,34.1256
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,64,1,0,1.9367
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,3072,1,0,52.3643
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,128,1,0,3.6772
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,256,1,0,7.5340
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,512,1,0,15.1441
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,32,4096,1,0,73.9031
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,16,1,0,1.2011
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,1024,1,0,31.9832
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,32,1,0,2.1256
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,64,1,0,3.9430
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,128,1,0,7.7277
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,1536,1,0,49.4398
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,256,1,0,14.7765
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,512,1,0,31.1591
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,16,1,0,2.4766
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,64,2048,1,0,66.4809
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,32,1,0,4.2881
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,64,1,0,8.0238
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,128,1,0,15.0765
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,16,1,0,0.1405
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,128,1024,1,0,60.2925
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,32,1,0,0.1442
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,64,1,0,0.1529
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,128,1,0,0.1605
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,256,1,0,30.3384
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,256,1,0,0.1831
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,512,1,0,0.2381
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,1024,1,0,0.3697
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,1536,1,0,0.4952
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,2048,1,0,0.6523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,3072,1,0,0.9399
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,4096,1,0,1.3099
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,128,256,512,1,0,60.3807
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,6144,1,0,2.0815
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,8192,1,0,3.1583
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,10240,1,0,4.3373
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,12288,1,0,5.4363
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,16,1,0,0.1461
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,32,1,0,0.1499
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,16384,1,0,8.3553
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,64,1,0,0.1598
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,128,1,0,0.1789
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,1,32768,1,0,24.8709
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,256,1,0,0.2343
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,512,1,0,0.3575
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,1024,1,0,0.6162
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,1536,1,0,0.8620
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,2048,1,0,1.1778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,3072,1,0,1.8424
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,4096,1,0,2.5965
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,6144,1,0,4.2948
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,8192,1,0,6.2757
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,10240,1,0,8.5047
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,12288,1,0,10.9942
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,16384,1,0,16.6887
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,16,1,0,0.1531
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,32,1,0,0.1653
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,64,1,0,0.1838
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,128,1,0,0.2350
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,256,1,0,0.3516
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,512,1,0,0.5976
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,1024,1,0,1.1149
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,1536,1,0,1.6951
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,2,32768,1,0,50.9096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,2048,1,0,2.3512
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,3072,1,0,3.8120
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,4096,1,0,5.2228
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,6144,1,0,8.9933
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,8192,1,0,12.6219
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,10240,1,0,17.1002
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,16,1,0,0.1690
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,32,1,0,0.1886
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,12288,1,0,22.2764
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,64,1,0,0.2398
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,128,1,0,0.3510
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,16384,1,0,33.6621
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,256,1,0,0.5912
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,512,1,0,1.0855
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,1024,1,0,2.2195
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,1536,1,0,3.4919
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,2048,1,0,4.8177
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,3072,1,0,7.4977
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,4096,1,0,10.7921
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,6144,1,0,17.3034
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,8192,1,0,25.3565
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,4,32768,1,0,101.2740
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,16,1,0,0.1995
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,10240,1,0,34.8562
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,32,1,0,0.2511
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,64,1,0,0.3597
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,128,1,0,0.5928
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,12288,1,0,44.5733
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,256,1,0,1.0883
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,512,1,0,2.1500
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,1024,1,0,4.5128
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,1536,1,0,6.8564
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,8,16384,1,0,69.0990
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,2048,1,0,9.6167
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,3072,1,0,15.1723
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,16,1,0,0.2706
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,4096,1,0,21.9092
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,32,1,0,0.3777
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,64,1,0,0.6080
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,6144,1,0,36.2337
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,128,1,0,1.0713
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,256,1,0,2.1300
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,512,1,0,4.3441
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,16,8192,1,0,52.1174
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,1024,1,0,9.2060
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,1536,1,0,13.8441
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,2048,1,0,20.3740
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,16,1,0,0.4164
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,32,1,0,0.6467
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,64,1,0,1.1158
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,128,1,0,2.1376
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,3072,1,0,32.0126
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,256,1,0,4.3206
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,32,4096,1,0,43.7996
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,512,1,0,8.7671
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,16,1,0,0.7298
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,1024,1,0,18.6784
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,32,1,0,1.1978
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,64,1,0,2.2016
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,1536,1,0,28.6017
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,128,1,0,4.2768
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,64,2048,1,0,40.2479
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,256,1,0,8.6622
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,512,1,0,18.1278
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,16,1,0,1.3438
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,32,1,0,2.4060
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,64,1,0,4.5859
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,128,1,0,8.9670
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,128,1024,1,0,36.9860
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,16,1,0,0.1238
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,32,1,0,0.1289
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,256,1,0,17.8583
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,64,1,0,0.1326
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,128,1,0,0.1400
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,256,1,0,0.1523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,64,256,512,1,0,36.8977
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,512,1,0,0.1883
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,1024,1,0,0.2555
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,1536,1,0,0.3341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,2048,1,0,0.4214
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,3072,1,0,0.5761
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,4096,1,0,0.7805
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,6144,1,0,1.2188
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,8192,1,0,1.7212
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,10240,1,0,2.2660
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,12288,1,0,3.0436
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,16384,1,0,4.5473
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,16,1,0,0.1298
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,1,32768,1,0,13.0457
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,32,1,0,0.1343
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,64,1,0,0.1404
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,128,1,0,0.1500
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,256,1,0,0.1839
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,512,1,0,0.2496
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,1024,1,0,0.3969
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,2048,1,0,0.7031
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,1536,1,0,0.5327
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,3072,1,0,1.0781
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,4096,1,0,1.4644
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,6144,1,0,2.5065
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,8192,1,0,3.4605
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,10240,1,0,4.8078
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,12288,1,0,5.9343
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,16,1,0,0.1354
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,16384,1,0,8.9208
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,32,1,0,0.1393
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,64,1,0,0.1489
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,128,1,0,0.1821
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,2,32768,1,0,26.8917
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,256,1,0,0.2454
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,512,1,0,0.3854
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,1024,1,0,0.6684
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,1536,1,0,1.0107
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,2048,1,0,1.3342
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,3072,1,0,2.1451
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,4096,1,0,2.9204
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,6144,1,0,4.8877
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,8192,1,0,6.8930
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,10240,1,0,9.3267
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,12288,1,0,11.8581
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,16,1,0,0.1420
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,16384,1,0,18.3710
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,32,1,0,0.1553
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,64,1,0,0.1831
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,256,1,0,0.3783
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,512,1,0,0.6502
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,1024,1,0,1.2814
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,1536,1,0,1.9847
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,128,1,0,0.2462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,3072,1,0,4.2435
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,4,32768,1,0,52.4957
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,2048,1,0,2.6742
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,4096,1,0,6.0237
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,6144,1,0,9.5846
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,8192,1,0,14.1710
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,16,1,0,0.1598
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,10240,1,0,18.9839
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,32,1,0,0.1895
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,12288,1,0,24.6126
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,64,1,0,0.2528
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,128,1,0,0.3806
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,256,1,0,0.6422
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,8,16384,1,0,37.0933
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,512,1,0,1.2459
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,1024,1,0,2.5782
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,1536,1,0,3.9496
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,2048,1,0,5.3679
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,3072,1,0,8.6370
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,4096,1,0,12.1348
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,16,1,0,0.1980
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,6144,1,0,20.4661
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,32,1,0,0.2599
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,64,1,0,0.3854
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,128,1,0,0.6430
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,16,8192,1,0,28.7532
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,256,1,0,1.2412
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,512,1,0,2.5209
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,1024,1,0,5.0812
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,1536,1,0,7.9745
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,2048,1,0,11.0641
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,16,1,0,0.2816
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,32,1,0,0.4072
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,3072,1,0,17.5903
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,64,1,0,0.6591
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,32,4096,1,0,24.6910
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,128,1,0,1.2338
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,256,1,0,2.4492
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,512,1,0,5.0606
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,1024,1,0,10.9052
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,16,1,0,0.4468
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,32,1,0,0.6973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,1536,1,0,16.3753
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,64,1,0,1.2732
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,64,2048,1,0,22.8627
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,128,1,0,2.4701
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,256,1,0,5.1788
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,16,1,0,0.7778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,512,1,0,10.4478
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,32,1,0,1.3540
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,64,1,0,2.5979
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,128,1,0,5.0649
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,128,1024,1,0,21.3571
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,256,1,0,10.3635
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,16,1,0,0.1094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,32,1,0,0.1073
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,64,1,0,0.1180
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,128,1,0,0.1200
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,256,1,0,0.1401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,32,256,512,1,0,20.7012
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,512,1,0,0.1613
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,1024,1,0,0.2078
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,1536,1,0,0.2509
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,2048,1,0,0.2928
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,3072,1,0,0.4075
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,4096,1,0,0.5287
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,6144,1,0,0.7951
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,8192,1,0,1.0792
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,10240,1,0,1.4212
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,12288,1,0,1.7485
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,16384,1,0,2.6126
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,16,1,0,0.1095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,1,32768,1,0,7.5061
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,32,1,0,0.1200
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,64,1,0,0.1245
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,128,1,0,0.1381
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,256,1,0,0.1582
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,512,1,0,0.1996
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,1024,1,0,0.2799
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,1536,1,0,0.3798
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,2048,1,0,0.4877
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,3072,1,0,0.7094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,4096,1,0,0.9442
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,6144,1,0,1.4929
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,8192,1,0,2.1304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,10240,1,0,2.7318
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,12288,1,0,3.5524
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,16,1,0,0.1196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,16384,1,0,5.1462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,32,1,0,0.1197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,2,32768,1,0,14.6160
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,64,1,0,0.1307
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,128,1,0,0.1549
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,256,1,0,0.1975
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,512,1,0,0.2731
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,1024,1,0,0.4621
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,1536,1,0,0.6658
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,2048,1,0,0.8779
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,3072,1,0,1.3366
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,4096,1,0,1.8319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,6144,1,0,2.9426
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,8192,1,0,4.5048
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,10240,1,0,5.7639
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,12288,1,0,7.2323
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,16,1,0,0.1197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,16384,1,0,10.6060
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,32,1,0,0.1363
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,64,1,0,0.1546
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,128,1,0,0.1937
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,256,1,0,0.2699
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,4,32768,1,0,30.2602
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,512,1,0,0.4520
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,1024,1,0,0.8375
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,1536,1,0,1.2542
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,2048,1,0,1.7109
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,3072,1,0,2.6603
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,4096,1,0,3.7859
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,6144,1,0,6.0708
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,8192,1,0,8.4685
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,10240,1,0,11.5824
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,12288,1,0,14.3914
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,16,1,0,0.1385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,32,1,0,0.1582
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,8,16384,1,0,20.9445
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,64,1,0,0.1977
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,128,1,0,0.2713
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,256,1,0,0.4461
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,512,1,0,0.8153
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,1024,1,0,1.6703
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,1536,1,0,2.4924
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,2048,1,0,3.5365
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,3072,1,0,5.4779
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,4096,1,0,7.4903
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,6144,1,0,12.3106
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,16,1,0,0.1639
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,32,1,0,0.2022
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,16,8192,1,0,17.7190
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,64,1,0,0.2779
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,128,1,0,0.4460
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,256,1,0,0.8095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,512,1,0,1.6850
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,1024,1,0,3.3640
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,1536,1,0,5.1965
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,2048,1,0,7.0551
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,16,1,0,0.2121
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,3072,1,0,11.4462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,32,1,0,0.2858
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,32,4096,1,0,15.6436
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,64,1,0,0.4552
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,128,1,0,0.8129
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,256,1,0,1.6031
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,512,1,0,3.4643
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,1024,1,0,6.7592
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,1536,1,0,10.7665
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,16,1,0,0.3058
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,32,1,0,0.4757
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,64,2048,1,0,14.3981
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,64,1,0,0.8289
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,128,1,0,1.6172
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,256,1,0,3.2059
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,512,1,0,6.7426
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,16,1,0,0.5128
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,32,1,0,0.8722
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,64,1,0,1.6493
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,128,1024,1,0,14.2592
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,128,1,0,3.3300
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,256,1,0,6.5176
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,32,1,0,0.1053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,16,1,0,0.1049
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,64,1,0,0.1114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,128,1,0,0.1175
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,16,256,512,1,0,13.7661
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,256,1,0,0.1281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,512,1,0,0.1436
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,1024,1,0,0.1825
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,1536,1,0,0.2162
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,2048,1,0,0.2506
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,3072,1,0,0.3241
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,4096,1,0,0.3993
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,6144,1,0,0.5847
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,8192,1,0,0.7826
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,10240,1,0,1.0028
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,12288,1,0,1.2224
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,16384,1,0,1.7446
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,1,32768,1,0,4.4985
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,16,1,0,0.1054
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,32,1,0,0.1099
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,64,1,0,0.1158
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,128,1,0,0.1279
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,256,1,0,0.1383
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,512,1,0,0.1762
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,1536,1,0,0.3050
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,1024,1,0,0.2370
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,2048,1,0,0.3721
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,3072,1,0,0.5307
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,4096,1,0,0.7005
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,6144,1,0,1.0716
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,8192,1,0,1.4619
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,10240,1,0,1.8741
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,12288,1,0,2.4485
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,16384,1,0,3.3930
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,16,1,0,0.1116
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,2,32768,1,0,8.9129
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,32,1,0,0.1156
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,64,1,0,0.1220
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,128,1,0,0.1404
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,256,1,0,0.1745
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,512,1,0,0.2309
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,1024,1,0,0.3580
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,1536,1,0,0.5066
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,2048,1,0,0.6592
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,3072,1,0,0.9875
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,6144,1,0,2.0545
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,4096,1,0,1.3278
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,8192,1,0,2.9100
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,10240,1,0,3.8129
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,12288,1,0,4.7848
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,16384,1,0,6.8824
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,16,1,0,0.1141
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,32,1,0,0.1204
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,64,1,0,0.1329
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,4,32768,1,0,18.2122
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,128,1,0,0.1705
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,256,1,0,0.2264
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,512,1,0,0.3534
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,1024,1,0,0.6392
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,1536,1,0,0.9327
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,2048,1,0,1.2661
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,3072,1,0,1.8900
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,4096,1,0,2.6359
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,6144,1,0,4.1050
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,8192,1,0,5.8848
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,10240,1,0,7.7543
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,12288,1,0,9.7929
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,16,1,0,0.1258
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,32,1,0,0.1383
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,64,1,0,0.1689
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,8,16384,1,0,13.9078
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,128,1,0,0.2254
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,256,1,0,0.3505
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,512,1,0,0.6230
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,1024,1,0,1.2382
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,1536,1,0,1.8242
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,2048,1,0,2.4916
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,3072,1,0,3.8467
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,4096,1,0,5.3487
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,6144,1,0,8.6240
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,16,1,0,0.1449
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,16,8192,1,0,11.8585
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,64,1,0,0.2282
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,32,1,0,0.1754
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,128,1,0,0.3496
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,256,1,0,0.6165
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,512,1,0,1.2049
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,1024,1,0,2.4482
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,1536,1,0,3.7949
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,2048,1,0,5.0298
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,3072,1,0,8.0144
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,16,1,0,0.1793
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,32,4096,1,0,10.8711
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,32,1,0,0.2348
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,64,1,0,0.3554
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,128,1,0,0.6240
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,256,1,0,1.1941
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,512,1,0,2.3817
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,1024,1,0,4.9633
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,1536,1,0,7.7425
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,16,1,0,0.2425
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,64,2048,1,0,10.3876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,32,1,0,0.3674
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,64,1,0,0.6249
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,128,1,0,1.2072
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,256,1,0,2.3661
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,512,1,0,4.8275
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,16,1,0,0.3853
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,32,1,0,0.6538
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,128,1024,1,0,10.1100
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,64,1,0,1.2134
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,128,1,0,2.3765
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,256,1,0,4.9043
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,16,1,0,0.1053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,32,1,0,0.1073
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,64,1,0,0.1116
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,8,256,512,1,0,9.8272
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,128,1,0,0.1136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,256,1,0,0.1241
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,512,1,0,0.1425
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,1536,1,0,0.1974
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,1024,1,0,0.1730
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,2048,1,0,0.2309
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,3072,1,0,0.2932
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,4096,1,0,0.3609
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,6144,1,0,0.4949
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,8192,1,0,0.6381
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,10240,1,0,0.7829
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,12288,1,0,0.9583
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,16384,1,0,1.3474
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,16,1,0,0.1037
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,1,32768,1,0,3.1373
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,32,1,0,0.1096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,64,1,0,0.1139
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,128,1,0,0.1224
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,256,1,0,0.1335
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,512,1,0,0.1659
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,1024,1,0,0.2163
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,1536,1,0,0.2709
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,2048,1,0,0.3363
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,3072,1,0,0.4560
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,4096,1,0,0.5881
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,6144,1,0,0.8625
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,8192,1,0,1.1745
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,10240,1,0,1.4813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,12288,1,0,1.7891
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,16384,1,0,2.6447
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,2,32768,1,0,6.2466
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,16,1,0,0.1115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,32,1,0,0.1138
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,64,1,0,0.1220
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,128,1,0,0.1304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,256,1,0,0.1640
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,512,1,0,0.2101
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,1024,1,0,0.3200
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,1536,1,0,0.4375
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,2048,1,0,0.5612
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,3072,1,0,0.8124
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,4096,1,0,1.0905
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,6144,1,0,1.6378
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,8192,1,0,2.2141
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,10240,1,0,2.8289
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,12288,1,0,3.5910
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,16384,1,0,5.1277
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,16,1,0,0.1136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,4,32768,1,0,12.3069
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,32,1,0,0.1179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,64,1,0,0.1337
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,128,1,0,0.1594
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,256,1,0,0.2059
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,512,1,0,0.3148
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,1024,1,0,0.5450
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,1536,1,0,0.7808
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,2048,1,0,1.0583
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,3072,1,0,1.5607
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,4096,1,0,2.0733
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,6144,1,0,3.2395
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,8192,1,0,4.4816
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,10240,1,0,5.9410
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,12288,1,0,7.4777
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,16,1,0,0.1200
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,8,16384,1,0,10.2842
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,32,1,0,0.1344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,64,1,0,0.1599
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,128,1,0,0.2040
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,256,1,0,0.3122
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,512,1,0,0.5393
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,1024,1,0,1.0344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,1536,1,0,1.5123
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,2048,1,0,1.9998
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,3072,1,0,3.1286
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,4096,1,0,4.1943
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,6144,1,0,6.7079
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,16,1,0,0.1280
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,16,8192,1,0,9.2073
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,32,1,0,0.1608
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,64,1,0,0.2046
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,128,1,0,0.3088
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,256,1,0,0.5301
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,512,1,0,1.0156
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,1024,1,0,2.0025
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,1536,1,0,3.0355
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,2048,1,0,4.1561
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,3072,1,0,6.4204
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,16,1,0,0.1622
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,32,4096,1,0,8.7288
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,32,1,0,0.2070
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,64,1,0,0.3141
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,128,1,0,0.5348
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,256,1,0,1.0129
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,512,1,0,1.9647
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,1024,1,0,3.9941
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,1536,1,0,6.2234
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,16,1,0,0.2124
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,64,2048,1,0,8.4375
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,32,1,0,0.3168
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,64,1,0,0.5378
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,128,1,0,1.0200
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,256,1,0,1.9424
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,512,1,0,4.0629
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,16,1,0,0.3284
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,128,1024,1,0,8.1894
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,32,1,0,0.5493
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,64,1,0,1.0209
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,128,1,0,1.9575
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,256,1,0,3.9629
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,16,1,0,0.0992
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,32,1,0,0.0995
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,64,1,0,0.1078
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,4,256,512,1,0,8.3282
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,128,1,0,0.1139
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,256,1,0,0.1221
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,512,1,0,0.1341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,1024,1,0,0.1692
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,1536,1,0,0.1915
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,2048,1,0,0.2224
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,3072,1,0,0.2843
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,4096,1,0,0.3431
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,6144,1,0,0.4669
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,8192,1,0,0.5942
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,10240,1,0,0.7225
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,12288,1,0,0.8528
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,16384,1,0,1.1414
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,1,32768,1,0,2.4268
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,16,1,0,0.1057
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,32,1,0,0.1108
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,64,1,0,0.1155
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,128,1,0,0.1197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,256,1,0,0.1360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,512,1,0,0.1609
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,1024,1,0,0.2100
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,2048,1,0,0.3162
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,1536,1,0,0.2641
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,3072,1,0,0.4274
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,4096,1,0,0.5421
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,6144,1,0,0.7762
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,8192,1,0,1.0260
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,10240,1,0,1.2719
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,12288,1,0,1.5631
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,16384,1,0,2.1158
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,2,32768,1,0,4.7663
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,16,1,0,0.1095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,32,1,0,0.1143
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,64,1,0,0.1198
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,128,1,0,0.1282
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,256,1,0,0.1614
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,512,1,0,0.2025
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,1024,1,0,0.3036
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,1536,1,0,0.4061
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,2048,1,0,0.5158
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,3072,1,0,0.7356
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,4096,1,0,0.9736
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,8192,1,0,1.9720
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,6144,1,0,1.4550
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,10240,1,0,2.4871
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,12288,1,0,3.0082
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,16384,1,0,4.2106
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,16,1,0,0.1136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,4,32768,1,0,9.6075
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,32,1,0,0.1201
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,64,1,0,0.1281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,128,1,0,0.1572
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,256,1,0,0.1989
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,512,1,0,0.2957
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,1024,1,0,0.5029
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,1536,1,0,0.7140
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,2048,1,0,0.9464
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,3072,1,0,1.4092
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,4096,1,0,1.8821
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,6144,1,0,2.8513
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,8192,1,0,3.8981
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,10240,1,0,5.0693
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,12288,1,0,6.2202
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,8,16384,1,0,8.4708
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,16,1,0,0.1197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,32,1,0,0.1252
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,64,1,0,0.1563
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,128,1,0,0.1969
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,256,1,0,0.2925
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,512,1,0,0.4967
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,1024,1,0,0.9402
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,2048,1,0,1.8113
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,1536,1,0,1.3840
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,3072,1,0,2.7841
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,4096,1,0,3.7507
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,6144,1,0,5.8686
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,16,8192,1,0,7.9228
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,16,1,0,0.1341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,32,1,0,0.1572
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,64,1,0,0.1954
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,128,1,0,0.2899
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,256,1,0,0.4934
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,512,1,0,0.9265
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,1024,1,0,1.7918
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,1536,1,0,2.7134
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,2048,1,0,3.6246
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,16,1,0,0.1572
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,3072,1,0,5.7791
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,32,4096,1,0,7.8677
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,32,1,0,0.1959
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,64,1,0,0.2901
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,128,1,0,0.4883
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,256,1,0,0.9323
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,512,1,0,1.7895
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,1024,1,0,3.6246
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,1536,1,0,5.6378
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,64,2048,1,0,7.7033
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,16,1,0,0.2001
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,32,1,0,0.2941
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,64,1,0,0.4924
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,128,1,0,0.9287
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,256,1,0,1.7720
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,512,1,0,3.6038
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,16,1,0,0.2981
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,128,1024,1,0,7.5683
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,32,1,0,0.4976
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,64,1,0,0.9392
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,128,1,0,1.7821
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,256,1,0,3.5647
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,16,1,0,0.1015
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,32,1,0,0.1015
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,2,256,512,1,0,7.4786
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,64,1,0,0.1056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,256,1,0,0.1219
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,128,1,0,0.1084
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,512,1,0,0.1360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,1024,1,0,0.1660
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,1536,1,0,0.1890
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,2048,1,0,0.2186
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,3072,1,0,0.2756
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,4096,1,0,0.3327
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,6144,1,0,0.4467
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,8192,1,0,0.5694
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,10240,1,0,0.6936
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,12288,1,0,0.8350
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,16384,1,0,1.1043
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,1,32768,1,0,2.1119
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,32,1,0,0.1067
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,16,1,0,0.1014
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,64,1,0,0.1077
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,128,1,0,0.1215
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,256,1,0,0.1286
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,512,1,0,0.1590
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,1024,1,0,0.2041
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,1536,1,0,0.2561
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,2048,1,0,0.3073
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,3072,1,0,0.4077
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,4096,1,0,0.5144
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,6144,1,0,0.7571
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,8192,1,0,0.9921
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,10240,1,0,1.2067
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,12288,1,0,1.4440
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,16384,1,0,1.9148
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,2,32768,1,0,4.2744
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,16,1,0,0.1063
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,32,1,0,0.1097
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,64,1,0,0.1192
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,128,1,0,0.1319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,256,1,0,0.1550
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,512,1,0,0.1975
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,1024,1,0,0.2920
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,1536,1,0,0.3861
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,2048,1,0,0.4921
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,3072,1,0,0.7162
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,4096,1,0,0.9355
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,6144,1,0,1.3731
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,8192,1,0,1.7985
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,10240,1,0,2.2619
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,12288,1,0,2.7671
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,16,1,0,0.1103
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,16384,1,0,3.7370
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,32,1,0,0.1166
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,4,32768,1,0,8.1625
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,64,1,0,0.1237
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,256,1,0,0.1940
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,128,1,0,0.1507
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,512,1,0,0.2870
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,1024,1,0,0.4770
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,1536,1,0,0.6994
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,2048,1,0,0.9094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,3072,1,0,1.3194
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,4096,1,0,1.7324
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,6144,1,0,2.6667
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,8192,1,0,3.5523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,10240,1,0,4.6332
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,12288,1,0,5.5270
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,16,1,0,0.1180
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,8,16384,1,0,7.6801
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,32,1,0,0.1283
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,64,1,0,0.1505
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,128,1,0,0.1913
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,256,1,0,0.2836
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,512,1,0,0.4703
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,1024,1,0,0.8980
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,1536,1,0,1.3134
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,2048,1,0,1.7052
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,3072,1,0,2.6133
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,4096,1,0,3.4661
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,6144,1,0,5.3393
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,16,8192,1,0,7.4643
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,16,1,0,0.1252
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,32,1,0,0.1517
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,64,1,0,0.1913
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,128,1,0,0.2785
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,256,1,0,0.4652
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,512,1,0,0.8897
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,1024,1,0,1.7038
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,1536,1,0,2.5671
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,2048,1,0,3.4765
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,16,1,0,0.1527
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,3072,1,0,5.3123
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,32,4096,1,0,7.1400
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,32,1,0,0.1915
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,64,1,0,0.2828
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,128,1,0,0.4626
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,256,1,0,0.8975
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,512,1,0,1.6935
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,1024,1,0,3.4218
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,1536,1,0,5.3004
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,64,2048,1,0,7.0062
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,16,1,0,0.1907
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,32,1,0,0.2804
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,64,1,0,0.4681
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,128,1,0,0.8880
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,256,1,0,1.6894
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,512,1,0,3.3633
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,128,1024,1,0,7.0773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,16,1,0,0.2872
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,32,1,0,0.4661
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,64,1,0,0.8985
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,128,1,0,1.6924
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,256,1,0,3.3888
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,float16,1,256,512,1,0,7.0176
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,512,1,0,0.3090
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1024,1,0,0.4961
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,2048,1,0,0.9219
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,3072,1,0,1.4423
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,16,1,0,0.1658
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,32,1,0,0.1707
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,64,1,0,0.1758
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,128,1,0,0.1974
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,256,1,0,0.2396
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,4096,1,0,1.9593
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,6144,1,0,3.1598
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,1536,1,0,0.6976
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,8192,1,0,4.5089
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,10240,1,0,6.0524
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,16,1,0,0.1737
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,32,1,0,0.1820
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,64,1,0,0.1999
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,128,1,0,0.2403
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,256,1,0,0.3029
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,512,1,0,0.4778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1024,1,0,0.8631
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,1536,1,0,1.3150
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,2048,1,0,1.7455
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,3072,1,0,2.7096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,4096,1,0,3.7486
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,12288,1,0,7.7120
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,6144,1,0,6.1236
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,16384,1,0,11.7417
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,8192,1,0,9.0429
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,10240,1,0,12.1286
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,1,32768,1,0,34.3421
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,16,1,0,0.1876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,12288,1,0,15.2224
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,16384,1,0,23.1940
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,32,1,0,0.2029
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,64,1,0,0.2469
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,128,1,0,0.3045
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,256,1,0,0.4724
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,512,1,0,0.8365
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1024,1,0,1.6357
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,1536,1,0,2.4734
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,2048,1,0,3.3332
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,3072,1,0,5.2498
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,2,32768,1,0,68.1887
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,4096,1,0,7.5614
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,6144,1,0,12.0748
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,8192,1,0,17.9179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,10240,1,0,24.0584
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,16,1,0,0.2161
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,32,1,0,0.2554
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,12288,1,0,30.7849
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,64,1,0,0.3117
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,128,1,0,0.4712
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,16384,1,0,46.1056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,256,1,0,0.8224
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,512,1,0,1.5798
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1024,1,0,3.1292
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,1536,1,0,4.7886
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,2048,1,0,6.7524
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,3072,1,0,10.3565
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,4096,1,0,14.9543
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,6144,1,0,24.5815
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,8192,1,0,35.5968
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,16,1,0,0.2732
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,32,1,0,0.3296
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,10240,1,0,47.8031
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,4,32768,1,0,132.9474
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,64,1,0,0.4878
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,128,1,0,0.8225
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,256,1,0,1.5572
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,12288,1,0,59.0428
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,512,1,0,3.0177
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1024,1,0,6.3426
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,1536,1,0,9.4572
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,8,16384,1,0,88.9484
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,2048,1,0,13.3817
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,3072,1,0,21.1742
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,16,1,0,0.3694
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,4096,1,0,29.7121
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,64,1,0,0.8600
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,32,1,0,0.5246
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,128,1,0,1.5601
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,6144,1,0,46.6558
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,256,1,0,2.9865
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,512,1,0,6.1327
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1024,1,0,12.5556
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,16,8192,1,0,67.9632
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,1536,1,0,19.4007
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,2048,1,0,26.5641
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,16,1,0,0.6015
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,32,1,0,0.9320
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,64,1,0,1.6355
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,128,1,0,2.9856
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,3072,1,0,39.8801
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,256,1,0,6.0622
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,512,1,0,12.1514
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,32,4096,1,0,56.3351
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1024,1,0,24.9566
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,16,1,0,1.0854
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,32,1,0,1.7864
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,64,1,0,3.1422
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,1536,1,0,36.3353
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,128,1,0,6.0713
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,256,1,0,11.9895
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,64,2048,1,0,49.9495
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,16,1,0,2.0924
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,512,1,0,24.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,32,1,0,3.4398
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,64,1,0,6.3809
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,128,1,0,12.0331
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,128,1024,1,0,46.7588
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,256,1,0,23.8355
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,128,256,512,1,0,45.0887
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,512,1,0,0.2362
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1536,1,0,0.4718
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,1024,1,0,0.3500
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,2048,1,0,0.6012
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,3072,1,0,0.8909
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,4096,1,0,1.1933
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,6144,1,0,1.9148
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,16,1,0,0.1503
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,32,1,0,0.1546
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,64,1,0,0.1561
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,8192,1,0,2.6967
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,128,1,0,0.1662
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,10240,1,0,3.5539
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,12288,1,0,4.4886
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,256,1,0,0.1943
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,16384,1,0,6.6035
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,16,1,0,0.1535
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,1,32768,1,0,18.7641
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,32,1,0,0.1530
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,64,1,0,0.1664
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,128,1,0,0.1914
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,256,1,0,0.2344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,512,1,0,0.3392
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1024,1,0,0.5669
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,1536,1,0,0.8215
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,2048,1,0,1.0797
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,3072,1,0,1.6740
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,4096,1,0,2.2936
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,6144,1,0,3.6649
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,8192,1,0,5.2196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,10240,1,0,6.9233
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,12288,1,0,8.7675
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,16,1,0,0.1591
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,16384,1,0,13.1398
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,32,1,0,0.1688
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,64,1,0,0.1947
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,128,1,0,0.2362
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,256,1,0,0.3296
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,512,1,0,0.5480
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1024,1,0,1.0199
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,1536,1,0,1.5499
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,2,32768,1,0,37.1476
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,2048,1,0,2.0785
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,3072,1,0,3.2185
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,4096,1,0,4.4616
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,6144,1,0,7.1715
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,8192,1,0,10.4453
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,16,1,0,0.1769
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,10240,1,0,13.8623
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,12288,1,0,17.3200
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,16384,1,0,25.9786
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,64,1,0,0.2415
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,32,1,0,0.1995
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,128,1,0,0.3331
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,256,1,0,0.5420
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,512,1,0,0.9901
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1024,1,0,1.9723
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,1536,1,0,2.9835
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,2048,1,0,4.0461
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,3072,1,0,6.2963
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,4,32768,1,0,73.6801
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,4096,1,0,8.9680
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,6144,1,0,14.1651
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,8192,1,0,20.6615
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,10240,1,0,27.5239
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,16,1,0,0.2079
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,32,1,0,0.2490
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,64,1,0,0.3409
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,128,1,0,0.5397
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,256,1,0,0.9749
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,12288,1,0,34.9180
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,512,1,0,1.9167
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1024,1,0,3.8310
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,1536,1,0,5.8479
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,8,16384,1,0,51.5975
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,2048,1,0,8.1518
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,3072,1,0,12.4472
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,16,1,0,0.2691
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,4096,1,0,17.7235
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,32,1,0,0.3595
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,64,1,0,0.5569
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,6144,1,0,28.7148
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,128,1,0,0.9761
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,256,1,0,1.8923
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,512,1,0,3.7308
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,16,8192,1,0,41.1372
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1024,1,0,7.7369
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,1536,1,0,11.5475
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,2048,1,0,16.1401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,16,1,0,0.3973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,32,1,0,0.5974
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,64,1,0,1.0119
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,3072,1,0,25.3039
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,128,1,0,1.8974
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,256,1,0,3.6883
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,32,4096,1,0,35.2194
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,512,1,0,7.5297
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1024,1,0,15.3607
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,16,1,0,0.6749
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,32,1,0,1.0892
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,1536,1,0,23.5396
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,64,1,0,1.9677
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,128,1,0,3.6900
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,64,2048,1,0,32.0749
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,256,1,0,7.4623
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,512,1,0,14.9304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,16,1,0,1.2419
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,32,1,0,2.1170
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,64,1,0,3.8387
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,128,1024,1,0,30.4787
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,128,1,0,7.4658
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,256,1,0,14.7739
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,64,256,512,1,0,29.6569
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1024,1,0,0.2606
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,512,1,0,0.1974
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,1536,1,0,0.3359
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,2048,1,0,0.4197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,16,1,0,0.1307
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,32,1,0,0.1301
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,4096,1,0,0.7568
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,64,1,0,0.1331
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,128,1,0,0.1479
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,256,1,0,0.1670
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,3072,1,0,0.5823
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,6144,1,0,1.1589
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,16,1,0,0.1384
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,32,1,0,0.1361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,8192,1,0,1.6098
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,10240,1,0,2.1112
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,12288,1,0,2.6594
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,16384,1,0,3.8444
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,64,1,0,0.1498
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,128,1,0,0.1630
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,256,1,0,0.1928
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,1,32768,1,0,10.3374
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,512,1,0,0.2531
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1024,1,0,0.3946
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,1536,1,0,0.5385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,2048,1,0,0.6934
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,3072,1,0,1.0246
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,4096,1,0,1.3875
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,8192,1,0,3.1193
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,6144,1,0,2.2189
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,10240,1,0,4.0869
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,12288,1,0,5.1189
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,16384,1,0,7.4487
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,16,1,0,0.1345
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,64,1,0,0.1632
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,32,1,0,0.1405
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,2,32768,1,0,20.4438
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,128,1,0,0.1906
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,256,1,0,0.2487
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,512,1,0,0.3822
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1024,1,0,0.6547
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,1536,1,0,0.9567
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,2048,1,0,1.2745
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,3072,1,0,1.9796
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,4096,1,0,2.7183
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,6144,1,0,4.2946
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,8192,1,0,6.0546
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,10240,1,0,7.9779
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,12288,1,0,10.0225
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,16,1,0,0.1452
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,16384,1,0,14.7893
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,32,1,0,0.1615
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,64,1,0,0.1914
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,128,1,0,0.2505
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,256,1,0,0.3781
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,512,1,0,0.6339
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1024,1,0,1.2141
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,4,32768,1,0,40.4124
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,1536,1,0,1.8526
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,2048,1,0,2.4994
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,3072,1,0,3.8467
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,4096,1,0,5.3072
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,6144,1,0,8.4284
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,8192,1,0,12.1114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,16,1,0,0.1667
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,10240,1,0,15.9634
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,12288,1,0,19.7786
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,32,1,0,0.1983
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,64,1,0,0.2573
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,8,16384,1,0,29.2484
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,128,1,0,0.3774
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,256,1,0,0.6284
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,512,1,0,1.1828
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1024,1,0,2.3945
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,1536,1,0,3.6147
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,2048,1,0,4.8902
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,3072,1,0,7.5566
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,4096,1,0,10.6133
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,16,1,0,0.2078
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,6144,1,0,16.6620
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,32,1,0,0.2651
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,16,8192,1,0,23.9778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,64,1,0,0.3866
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,128,1,0,0.6277
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,256,1,0,1.1685
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,512,1,0,2.3342
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1024,1,0,4.6846
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,1536,1,0,7.1081
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,2048,1,0,9.8201
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,3072,1,0,14.9248
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,16,1,0,0.2859
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,32,1,0,0.4051
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,64,1,0,0.6450
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,32,4096,1,0,21.0756
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,128,1,0,1.1680
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,256,1,0,2.3151
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,512,1,0,4.5677
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1024,1,0,9.4116
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,32,1,0,0.6817
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,1536,1,0,14.0253
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,16,1,0,0.4447
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,64,1,0,1.2077
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,64,2048,1,0,19.4284
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,128,1,0,2.3188
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,256,1,0,4.5368
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,512,1,0,9.1973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,16,1,0,0.7577
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,32,1,0,1.2828
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,64,1,0,2.3895
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,128,1024,1,0,18.6364
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,128,1,0,4.5363
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,256,1,0,9.1403
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,32,256,512,1,0,18.2246
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1024,1,0,0.2187
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,512,1,0,0.1671
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,16,1,0,0.1240
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,32,1,0,0.1278
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,64,1,0,0.1290
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,128,1,0,0.1340
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,256,1,0,0.1443
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,1536,1,0,0.2691
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,4096,1,0,0.5472
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,2048,1,0,0.3173
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,3072,1,0,0.4290
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,16,1,0,0.1281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,8192,1,0,1.0737
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,32,1,0,0.1273
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,10240,1,0,1.3778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,12288,1,0,1.7104
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,64,1,0,0.1341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,16384,1,0,2.4427
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,128,1,0,0.1406
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,256,1,0,0.1638
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,32768,1,0,6.2133
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,512,1,0,0.2120
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1024,1,0,0.3039
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,1536,1,0,0.4039
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,2048,1,0,0.5049
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,3072,1,0,0.7175
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,4096,1,0,0.9450
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,1,6144,1,0,0.7972
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,6144,1,0,1.4667
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,8192,1,0,2.0344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,10240,1,0,2.6441
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,12288,1,0,3.2857
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,16384,1,0,4.6931
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,16,1,0,0.1289
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,32,1,0,0.1319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,64,1,0,0.1406
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,2,32768,1,0,11.9911
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,128,1,0,0.1585
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,256,1,0,0.2105
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,512,1,0,0.2988
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1024,1,0,0.4819
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,1536,1,0,0.6800
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,2048,1,0,0.8795
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,3072,1,0,1.3329
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,4096,1,0,1.8157
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,6144,1,0,2.8461
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,8192,1,0,3.9602
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,10240,1,0,5.1361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,12288,1,0,6.3700
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,16384,1,0,9.1075
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,16,1,0,0.1323
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,32,1,0,0.1401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,64,1,0,0.1652
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,4,32768,1,0,23.7667
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,128,1,0,0.2085
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,256,1,0,0.2955
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,512,1,0,0.4717
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1024,1,0,0.8458
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,1536,1,0,1.2662
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,2048,1,0,1.7011
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,3072,1,0,2.6113
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,4096,1,0,3.5610
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,6144,1,0,5.5587
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,8192,1,0,7.7104
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,16,1,0,0.1443
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,10240,1,0,10.0360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,12288,1,0,12.4854
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,32,1,0,0.1609
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,64,1,0,0.2114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,8,16384,1,0,18.0803
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,128,1,0,0.2950
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,256,1,0,0.4632
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,512,1,0,0.8274
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1024,1,0,1.6420
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,1536,1,0,2.4901
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,2048,1,0,3.3465
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,4096,1,0,6.9574
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,3072,1,0,5.1090
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,16,1,0,0.1695
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,6144,1,0,10.9065
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,32,1,0,0.2159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,16,8192,1,0,15.3973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,64,1,0,0.3010
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,128,1,0,0.4651
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,256,1,0,0.8191
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,512,1,0,1.6088
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1024,1,0,3.2362
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,1536,1,0,4.8753
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,2048,1,0,6.5562
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,3072,1,0,10.0256
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,16,1,0,0.2250
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,32,4096,1,0,13.9294
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,32,1,0,0.3091
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,64,1,0,0.4741
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,128,1,0,0.8179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,256,1,0,1.6001
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,512,1,0,3.1789
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1024,1,0,6.3444
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,16,1,0,0.3295
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,1536,1,0,9.5963
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,64,2048,1,0,13.1384
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,32,1,0,0.4928
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,64,1,0,0.8359
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,128,1,0,1.5970
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,256,1,0,3.1637
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,512,1,0,6.2315
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,16,1,0,0.5305
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,32,1,0,0.8730
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,64,1,0,1.6360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,128,1024,1,0,12.7184
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,128,1,0,3.1635
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,256,1,0,6.2005
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,16,256,512,1,0,12.4859
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,512,1,0,0.1577
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,32,1,0,0.1260
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,128,1,0,0.1319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,64,1,0,0.1267
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1024,1,0,0.2016
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,16,1,0,0.1219
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,256,1,0,0.1401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,1536,1,0,0.2398
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,3072,1,0,0.3573
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,2048,1,0,0.2790
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,6144,1,0,0.6204
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,8192,1,0,0.8186
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,10240,1,0,1.0292
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,16,1,0,0.1260
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,32,1,0,0.1262
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,64,1,0,0.1304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,128,1,0,0.1363
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,256,1,0,0.1566
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,512,1,0,0.1942
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1024,1,0,0.2659
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,1536,1,0,0.3369
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,4096,1,0,0.4341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,12288,1,0,1.2608
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,2048,1,0,0.4083
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,3072,1,0,0.5652
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,16384,1,0,1.7444
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,4096,1,0,0.7426
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,6144,1,0,1.1082
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,8192,1,0,1.5023
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,1,32768,1,0,4.1344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,10240,1,0,1.9178
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,12288,1,0,2.3542
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,16,1,0,0.1259
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,16384,1,0,3.3022
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,64,1,0,0.1363
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,2,32768,1,0,7.9116
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,32,1,0,0.1320
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,128,1,0,0.1538
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,256,1,0,0.1927
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,512,1,0,0.2585
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1024,1,0,0.3968
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,1536,1,0,0.5409
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,2048,1,0,0.7007
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,3072,1,0,1.0295
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,4096,1,0,1.3775
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,6144,1,0,2.1075
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,8192,1,0,2.8983
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,12288,1,0,4.5627
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,10240,1,0,3.7147
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,16,1,0,0.1321
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,16384,1,0,6.3914
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,32,1,0,0.1381
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,64,1,0,0.1517
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,4,32768,1,0,15.3709
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,128,1,0,0.1864
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,256,1,0,0.2576
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,512,1,0,0.3891
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1024,1,0,0.6727
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,1536,1,0,0.9884
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,2048,1,0,1.3095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,3072,1,0,1.9763
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,4096,1,0,2.6806
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,6144,1,0,4.1307
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,8192,1,0,5.6524
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,10240,1,0,7.2318
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,12288,1,0,8.8984
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,16,1,0,0.1367
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,32,1,0,0.1504
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,8,16384,1,0,12.4771
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,64,1,0,0.1906
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,128,1,0,0.2541
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,512,1,0,0.6610
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,256,1,0,0.3858
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1024,1,0,1.2766
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,1536,1,0,1.9070
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,2048,1,0,2.5659
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,3072,1,0,3.8985
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,4096,1,0,5.2572
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,16,1,0,0.1534
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,6144,1,0,8.0818
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,32,1,0,0.1936
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,64,1,0,0.2579
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,16,8192,1,0,11.0739
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,128,1,0,0.3875
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,256,1,0,0.6582
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,512,1,0,1.2558
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1024,1,0,2.5084
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,1536,1,0,3.7749
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,2048,1,0,5.0499
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,3072,1,0,7.6372
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,32,4096,1,0,10.3191
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,16,1,0,0.1972
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,32,1,0,0.2626
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,64,1,0,0.3911
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,128,1,0,0.6610
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,256,1,0,1.2502
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,512,1,0,2.4767
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1024,1,0,4.9397
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,1536,1,0,7.3925
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,16,1,0,0.2716
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,64,2048,1,0,9.9118
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,32,1,0,0.4007
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,64,1,0,0.6642
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,128,1,0,1.2501
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,256,1,0,2.4642
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,512,1,0,4.8826
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,128,1024,1,0,9.7028
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,32,1,0,0.6871
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,16,1,0,0.4210
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,64,1,0,1.2677
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,128,1,0,2.4675
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,256,1,0,4.8647
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,8,256,512,1,0,9.6064
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,512,1,0,0.1545
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1024,1,0,0.1956
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,16,1,0,0.1179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,64,1,0,0.1280
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,32,1,0,0.1280
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,128,1,0,0.1299
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,256,1,0,0.1381
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,1536,1,0,0.2257
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,2048,1,0,0.2630
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,3072,1,0,0.3330
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,4096,1,0,0.3990
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,6144,1,0,0.5359
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,8192,1,0,0.6828
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,10240,1,0,0.8430
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,12288,1,0,1.0283
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,16,1,0,0.1270
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,16384,1,0,1.4098
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,32,1,0,0.1270
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,64,1,0,0.1302
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,128,1,0,0.1361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,256,1,0,0.1502
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,512,1,0,0.1884
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1024,1,0,0.2504
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,1536,1,0,0.3125
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,1,32768,1,0,3.0988
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,2048,1,0,0.3733
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,3072,1,0,0.4975
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,4096,1,0,0.6304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,6144,1,0,0.9333
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,8192,1,0,1.2571
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,10240,1,0,1.5883
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,12288,1,0,1.9155
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,16384,1,0,2.6178
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,16,1,0,0.1299
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,2,32768,1,0,5.8544
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,32,1,0,0.1299
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,64,1,0,0.1342
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,128,1,0,0.1487
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,256,1,0,0.1856
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,512,1,0,0.2425
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1024,1,0,0.3604
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,1536,1,0,0.4788
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,2048,1,0,0.6050
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,3072,1,0,0.8849
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,4096,1,0,1.1780
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,6144,1,0,1.7670
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,8192,1,0,2.3756
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,10240,1,0,3.0159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,12288,1,0,3.6654
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,16384,1,0,5.0348
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,16,1,0,0.1302
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,4,32768,1,0,11.3316
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,32,1,0,0.1360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,128,1,0,0.1813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,64,1,0,0.1521
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,256,1,0,0.2407
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,512,1,0,0.3543
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1024,1,0,0.5904
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,1536,1,0,0.8626
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,2048,1,0,1.1363
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,3072,1,0,1.6844
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,4096,1,0,2.2505
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,6144,1,0,3.4226
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,8192,1,0,4.6249
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,10240,1,0,5.8693
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,12288,1,0,7.1309
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,8,16384,1,0,9.8159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,16,1,0,0.1347
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,32,1,0,0.1477
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,64,1,0,0.1831
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,128,1,0,0.2371
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,256,1,0,0.3509
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,512,1,0,0.5845
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1024,1,0,1.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,1536,1,0,1.6450
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,2048,1,0,2.1834
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,3072,1,0,3.2945
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,4096,1,0,4.4103
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,16,1,0,0.1485
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,6144,1,0,6.7003
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,16,8192,1,0,9.0694
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,32,1,0,0.1814
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,64,1,0,0.2369
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,128,1,0,0.3493
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,256,1,0,0.5810
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,512,1,0,1.1032
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1024,1,0,2.1506
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,1536,1,0,3.2216
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,2048,1,0,4.3000
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,3072,1,0,6.4707
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,32,4096,1,0,8.6808
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,16,1,0,0.1855
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,32,1,0,0.2383
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,64,1,0,0.3527
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,128,1,0,0.5840
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,256,1,0,1.0957
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,512,1,0,2.1305
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1024,1,0,4.2437
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,1536,1,0,6.3404
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,64,2048,1,0,8.4710
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,16,1,0,0.2445
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,32,1,0,0.3574
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,64,1,0,0.5894
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,128,1,0,1.0977
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,256,1,0,2.1255
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,512,1,0,4.2065
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,16,1,0,0.3660
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,128,1024,1,0,8.3554
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,32,1,0,0.5977
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,64,1,0,1.1057
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,128,1,0,2.1235
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,256,1,0,4.2001
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,4,256,512,1,0,8.3103
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,512,1,0,0.1508
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1536,1,0,0.2214
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,16,1,0,0.1218
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,32,1,0,0.1224
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,1024,1,0,0.1894
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,64,1,0,0.1272
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,128,1,0,0.1312
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,256,1,0,0.1385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,2048,1,0,0.2558
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,3072,1,0,0.3188
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,4096,1,0,0.3850
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,6144,1,0,0.5115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,8192,1,0,0.6482
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,12288,1,0,0.9345
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,10240,1,0,0.7914
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,16384,1,0,1.2184
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,16,1,0,0.1196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,32,1,0,0.1258
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,64,1,0,0.1280
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,128,1,0,0.1342
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,256,1,0,0.1507
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,512,1,0,0.1833
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1024,1,0,0.2435
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,1536,1,0,0.2970
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,1,32768,1,0,2.5882
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,2048,1,0,0.3582
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,3072,1,0,0.4713
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,4096,1,0,0.5952
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,10240,1,0,1.3937
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,6144,1,0,0.8571
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,12288,1,0,1.6760
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,8192,1,0,1.1154
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,16384,1,0,2.2906
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,2,32768,1,0,4.8382
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,16,1,0,0.1237
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,32,1,0,0.1298
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,128,1,0,0.1462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,64,1,0,0.1333
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,256,1,0,0.1834
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,512,1,0,0.2367
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1024,1,0,0.3460
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,1536,1,0,0.4572
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,2048,1,0,0.5697
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,3072,1,0,0.8180
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,4096,1,0,1.0625
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,6144,1,0,1.5895
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,8192,1,0,2.1341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,10240,1,0,2.6840
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,12288,1,0,3.2276
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,16384,1,0,4.3574
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,4,32768,1,0,9.3038
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,16,1,0,0.1281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,32,1,0,0.1346
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,64,1,0,0.1466
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,128,1,0,0.1798
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,256,1,0,0.2336
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,512,1,0,0.3388
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1024,1,0,0.5595
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,1536,1,0,0.7985
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,2048,1,0,1.0369
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,3072,1,0,1.5407
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,4096,1,0,2.0554
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,6144,1,0,3.0772
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,8192,1,0,4.1121
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,10240,1,0,5.1837
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,16,1,0,0.1341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,12288,1,0,6.2532
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,32,1,0,0.1464
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,8,16384,1,0,8.4589
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,64,1,0,0.1784
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,128,1,0,0.2293
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,256,1,0,0.3350
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,512,1,0,0.5485
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1024,1,0,1.0253
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,1536,1,0,1.5104
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,2048,1,0,2.0127
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,3072,1,0,2.9920
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,4096,1,0,3.9915
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,6144,1,0,6.0025
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,16,1,0,0.1443
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,16,8192,1,0,8.0609
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,32,1,0,0.1748
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,64,1,0,0.2304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,128,1,0,0.3328
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,256,1,0,0.5516
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,512,1,0,1.0190
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1024,1,0,1.9895
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,1536,1,0,2.9548
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,2048,1,0,3.9244
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,3072,1,0,5.8704
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,16,1,0,0.1785
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,32,4096,1,0,7.8409
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,32,1,0,0.2304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,64,1,0,0.3326
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,128,1,0,0.5428
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,256,1,0,1.0164
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,512,1,0,1.9801
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1024,1,0,3.8934
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,1536,1,0,5.8037
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,64,2048,1,0,7.7283
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,16,1,0,0.2327
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,32,1,0,0.3352
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,64,1,0,0.5497
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,128,1,0,1.0181
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,256,1,0,1.9726
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,512,1,0,3.8708
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,16,1,0,0.3401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,128,1024,1,0,7.6675
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,32,1,0,0.5532
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,64,1,0,1.0218
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,128,1,0,1.9724
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,256,1,0,3.8660
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,2,256,512,1,0,7.6404
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1024,1,0,0.1807
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,512,1,0,0.1482
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,1536,1,0,0.2197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,16,1,0,0.1177
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,64,1,0,0.1218
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,128,1,0,0.1241
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,32,1,0,0.1197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,256,1,0,0.1323
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,2048,1,0,0.2523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,3072,1,0,0.3149
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,4096,1,0,0.3766
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,16,1,0,0.1220
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,6144,1,0,0.5014
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,32,1,0,0.1217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,64,1,0,0.1282
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,8192,1,0,0.6331
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,10240,1,0,0.7670
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,12288,1,0,0.9057
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,16384,1,0,1.1743
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,128,1,0,0.1340
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,1,32768,1,0,2.2903
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,256,1,0,0.1492
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,512,1,0,0.1813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1024,1,0,0.2392
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,1536,1,0,0.2942
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,3072,1,0,0.4590
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,2048,1,0,0.3490
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,4096,1,0,0.5774
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,6144,1,0,0.8283
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,8192,1,0,1.0752
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,10240,1,0,1.3268
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,12288,1,0,1.5733
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,16384,1,0,2.0829
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,2,32768,1,0,4.3020
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,16,1,0,0.1202
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,32,1,0,0.1281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,64,1,0,0.1309
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,128,1,0,0.1431
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,256,1,0,0.1741
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,512,1,0,0.2329
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1024,1,0,0.3369
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,1536,1,0,0.4389
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,2048,1,0,0.5553
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,3072,1,0,0.7901
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,4096,1,0,1.0222
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,6144,1,0,1.4966
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,8192,1,0,1.9820
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,10240,1,0,2.4746
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,12288,1,0,2.9739
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,16384,1,0,3.9938
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,16,1,0,0.1261
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,32,1,0,0.1316
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,4,32768,1,0,8.2267
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,64,1,0,0.1422
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,128,1,0,0.1736
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,256,1,0,0.2304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,512,1,0,0.3289
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1024,1,0,0.5376
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,2048,1,0,0.9960
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,3072,1,0,1.4560
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,1536,1,0,0.7690
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,4096,1,0,1.9303
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,6144,1,0,2.8850
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,8192,1,0,3.8385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,10240,1,0,4.8110
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,12288,1,0,5.7679
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,16,1,0,0.1301
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,32,1,0,0.1422
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,8,16384,1,0,7.7476
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,64,1,0,0.1731
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,128,1,0,0.2252
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,256,1,0,0.3277
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,512,1,0,0.5311
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1024,1,0,0.9833
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,1536,1,0,1.4393
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,2048,1,0,1.9044
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,3072,1,0,2.8406
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,4096,1,0,3.7642
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,6144,1,0,5.6149
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,32,1,0,0.1751
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,16,1,0,0.1420
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,16,8192,1,0,7.5079
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,64,1,0,0.2264
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,128,1,0,0.3252
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,256,1,0,0.5310
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,512,1,0,0.9764
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1024,1,0,1.8922
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,1536,1,0,2.8078
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,2048,1,0,3.7212
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,3072,1,0,5.5433
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,16,1,0,0.1773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,32,4096,1,0,7.3786
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,32,1,0,0.2246
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,64,1,0,0.3243
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,128,1,0,0.5304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,256,1,0,0.9719
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,512,1,0,1.8869
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1024,1,0,3.7011
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,1536,1,0,5.4984
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,64,2048,1,0,7.3104
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,16,1,0,0.2262
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,32,1,0,0.3247
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,64,1,0,0.5251
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,128,1,0,0.9724
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,256,1,0,1.8795
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,512,1,0,3.6882
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,16,1,0,0.3285
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,128,1024,1,0,7.2749
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,32,1,0,0.5318
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,64,1,0,0.9735
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,128,1,0,1.8840
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,256,1,0,3.6807
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,16,1,0,0.1667
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,fp8_block,1,256,512,1,0,7.2661
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,64,1,0,0.1810
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,32,1,0,0.1718
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,128,1,0,0.1986
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,256,1,0,0.2405
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,512,1,0,0.3090
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,1024,1,0,0.4976
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,2048,1,0,0.9242
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,1536,1,0,0.6973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,3072,1,0,1.4433
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,4096,1,0,1.9641
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,6144,1,0,3.1610
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,8192,1,0,4.5171
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,10240,1,0,6.0593
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,12288,1,0,7.7272
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,16,1,0,0.1737
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,32,1,0,0.1813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,16384,1,0,11.7542
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,64,1,0,0.2012
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,128,1,0,0.2407
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,256,1,0,0.3057
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,512,1,0,0.4793
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,1,32768,1,0,34.3343
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,1024,1,0,0.8634
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,1536,1,0,1.3218
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,2048,1,0,1.7487
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,3072,1,0,2.7108
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,4096,1,0,3.7589
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,6144,1,0,6.1296
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,8192,1,0,9.0547
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,10240,1,0,12.1382
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,16,1,0,0.1882
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,12288,1,0,15.2285
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,16384,1,0,23.2332
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,32,1,0,0.2053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,64,1,0,0.2473
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,128,1,0,0.3058
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,256,1,0,0.4696
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,512,1,0,0.8348
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,1024,1,0,1.6382
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,1536,1,0,2.4778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,2048,1,0,3.3450
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,2,32768,1,0,68.2678
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,3072,1,0,5.2541
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,4096,1,0,7.5620
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,6144,1,0,12.0823
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,8192,1,0,17.9377
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,10240,1,0,24.0663
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,16,1,0,0.2158
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,32,1,0,0.2540
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,12288,1,0,30.8360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,64,1,0,0.3118
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,128,1,0,0.4707
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,256,1,0,0.8246
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,512,1,0,1.5798
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,16384,1,0,46.1259
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,1024,1,0,3.1344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,1536,1,0,4.8008
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,2048,1,0,6.7652
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,3072,1,0,10.3673
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,4096,1,0,14.9938
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,6144,1,0,24.6113
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,8192,1,0,35.6116
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,16,1,0,0.2741
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,10240,1,0,47.8325
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,32,1,0,0.3291
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,64,1,0,0.4876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,4,32768,1,0,133.0575
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,128,1,0,0.8236
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,12288,1,0,59.1425
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,256,1,0,1.5625
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,512,1,0,3.0247
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,1024,1,0,6.3543
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,1536,1,0,9.4676
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,8,16384,1,0,89.0770
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,2048,1,0,13.3779
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,3072,1,0,21.1987
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,4096,1,0,29.7871
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,16,1,0,0.3687
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,32,1,0,0.5261
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,64,1,0,0.8596
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,6144,1,0,46.7713
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,128,1,0,1.5618
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,256,1,0,2.9914
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,512,1,0,6.1436
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,1024,1,0,12.5773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,16,8192,1,0,68.0319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,1536,1,0,19.4301
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,16,1,0,0.6035
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,2048,1,0,26.5886
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,32,1,0,0.9338
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,64,1,0,1.6373
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,128,1,0,2.9911
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,3072,1,0,39.9210
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,256,1,0,6.0703
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,32,4096,1,0,56.3841
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,512,1,0,12.1685
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,16,1,0,1.0865
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,32,1,0,1.7871
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,1024,1,0,24.9924
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,1536,1,0,36.4086
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,64,1,0,3.1498
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,128,1,0,6.0829
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,256,1,0,12.0261
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,64,2048,1,0,50.0451
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,16,1,0,2.0924
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,512,1,0,24.1465
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,32,1,0,3.4492
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,64,1,0,6.3915
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,128,1,0,12.0331
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,128,1024,1,0,46.9081
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,32,1,0,0.1518
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,16,1,0,0.1490
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,64,1,0,0.1579
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,256,1,0,23.8816
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,128,1,0,0.1689
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,256,1,0,0.1932
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,512,1,0,0.2365
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,1024,1,0,0.3507
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,1536,1,0,0.4730
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,2048,1,0,0.6052
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,128,256,512,1,0,45.2067
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,3072,1,0,0.8926
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,4096,1,0,1.1974
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,6144,1,0,1.9174
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,8192,1,0,2.7033
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,10240,1,0,3.5635
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,12288,1,0,4.4968
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,16384,1,0,6.6108
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,16,1,0,0.1551
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,1,32768,1,0,18.7892
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,32,1,0,0.1549
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,64,1,0,0.1667
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,128,1,0,0.1917
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,256,1,0,0.2340
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,512,1,0,0.3402
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,1024,1,0,0.5696
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,1536,1,0,0.8220
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,2048,1,0,1.0828
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,3072,1,0,1.6811
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,4096,1,0,2.3013
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,6144,1,0,3.6774
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,8192,1,0,5.2241
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,10240,1,0,6.9288
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,12288,1,0,8.7806
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,16,1,0,0.1560
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,16384,1,0,13.1477
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,32,1,0,0.1700
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,64,1,0,0.1936
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,128,1,0,0.2353
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,256,1,0,0.3323
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,512,1,0,0.5498
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,1024,1,0,1.0201
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,2,32768,1,0,37.1773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,1536,1,0,1.5532
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,2048,1,0,2.0844
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,3072,1,0,3.2228
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,4096,1,0,4.4671
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,6144,1,0,7.1882
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,8192,1,0,10.4767
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,10240,1,0,13.8882
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,12288,1,0,17.3492
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,16,1,0,0.1758
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,32,1,0,0.1988
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,16384,1,0,26.0186
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,64,1,0,0.2401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,128,1,0,0.3339
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,256,1,0,0.5410
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,512,1,0,0.9941
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,1024,1,0,1.9771
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,1536,1,0,2.9914
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,2048,1,0,4.0543
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,3072,1,0,6.3194
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,4,32768,1,0,73.7970
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,4096,1,0,8.9712
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,6144,1,0,14.1950
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,8192,1,0,20.7088
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,10240,1,0,27.5748
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,16,1,0,0.2087
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,32,1,0,0.2486
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,64,1,0,0.3414
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,12288,1,0,34.9775
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,128,1,0,0.5415
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,256,1,0,0.9796
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,512,1,0,1.9197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,1024,1,0,3.8401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,1536,1,0,5.8544
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,8,16384,1,0,51.7048
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,2048,1,0,8.1686
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,3072,1,0,12.4777
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,16,1,0,0.2685
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,32,1,0,0.3597
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,4096,1,0,17.7710
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,64,1,0,0.5570
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,6144,1,0,28.7881
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,128,1,0,0.9805
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,256,1,0,1.8952
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,512,1,0,3.7366
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,16,8192,1,0,41.2171
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,1024,1,0,7.7625
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,1536,1,0,11.5772
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,2048,1,0,16.1782
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,16,1,0,0.3991
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,32,1,0,0.5954
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,3072,1,0,25.3743
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,64,1,0,1.0154
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,128,1,0,1.9013
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,32,4096,1,0,35.3449
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,256,1,0,3.6898
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,512,1,0,7.5446
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,16,1,0,0.6740
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,1024,1,0,15.3577
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,32,1,0,1.0920
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,64,1,0,1.9705
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,1536,1,0,23.6075
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,128,1,0,3.7010
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,64,2048,1,0,32.1818
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,256,1,0,7.4860
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,16,1,0,1.2453
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,32,1,0,2.1233
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,512,1,0,14.9473
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,64,1,0,3.8508
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,128,1,0,7.4909
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,128,1024,1,0,30.5294
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,16,1,0,0.1281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,32,1,0,0.1299
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,256,1,0,14.8267
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,64,1,0,0.1343
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,128,1,0,0.1459
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,256,1,0,0.1654
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,512,1,0,0.1962
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,64,256,512,1,0,29.7269
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,1024,1,0,0.2589
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,1536,1,0,0.3367
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,2048,1,0,0.4190
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,3072,1,0,0.5830
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,4096,1,0,0.7655
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,6144,1,0,1.1612
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,8192,1,0,1.6108
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,10240,1,0,2.1151
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,12288,1,0,2.6657
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,16384,1,0,3.8590
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,1,32768,1,0,10.3614
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,16,1,0,0.1354
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,32,1,0,0.1361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,64,1,0,0.1423
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,128,1,0,0.1629
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,256,1,0,0.1919
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,512,1,0,0.2552
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,1024,1,0,0.3966
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,1536,1,0,0.5421
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,2048,1,0,0.6930
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,3072,1,0,1.0254
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,4096,1,0,1.3954
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,6144,1,0,2.2250
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,8192,1,0,3.1275
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,10240,1,0,4.0996
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,12288,1,0,5.1314
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,16384,1,0,7.4678
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,16,1,0,0.1341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,32,1,0,0.1401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,64,1,0,0.1617
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,128,1,0,0.1916
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,256,1,0,0.2510
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,2,32768,1,0,20.4797
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,512,1,0,0.3876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,1024,1,0,0.6560
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,1536,1,0,0.9566
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,2048,1,0,1.2773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,3072,1,0,1.9828
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,4096,1,0,2.7297
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,6144,1,0,4.3085
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,8192,1,0,6.0825
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,10240,1,0,7.9984
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,16,1,0,0.1462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,12288,1,0,10.0503
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,32,1,0,0.1669
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,16384,1,0,14.8316
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,64,1,0,0.1944
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,128,1,0,0.2520
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,256,1,0,0.3774
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,512,1,0,0.6380
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,1024,1,0,1.2186
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,4,32768,1,0,40.5431
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,1536,1,0,1.8611
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,2048,1,0,2.5114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,3072,1,0,3.8637
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,4096,1,0,5.3244
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,6144,1,0,8.4694
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,8192,1,0,12.1463
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,16,1,0,0.1708
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,10240,1,0,15.9896
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,32,1,0,0.1996
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,12288,1,0,19.8714
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,64,1,0,0.2569
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,128,1,0,0.3799
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,8,16384,1,0,29.3245
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,256,1,0,0.6308
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,512,1,0,1.1886
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,1024,1,0,2.4029
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,1536,1,0,3.6304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,2048,1,0,4.9026
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,3072,1,0,7.6007
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,4096,1,0,10.6520
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,16,1,0,0.2078
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,32,1,0,0.2678
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,6144,1,0,16.6872
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,64,1,0,0.3881
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,16,8192,1,0,24.0508
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,128,1,0,0.6277
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,256,1,0,1.1770
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,512,1,0,2.3456
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,1024,1,0,4.6969
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,1536,1,0,7.1396
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,2048,1,0,9.8423
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,16,1,0,0.2864
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,3072,1,0,15.0015
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,32,1,0,0.4077
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,64,1,0,0.6476
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,32,4096,1,0,21.1117
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,128,1,0,1.1752
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,256,1,0,2.3253
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,512,1,0,4.5850
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,1024,1,0,9.4501
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,16,1,0,0.4456
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,32,1,0,0.6856
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,1536,1,0,14.0973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,64,1,0,1.2110
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,64,2048,1,0,19.5335
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,128,1,0,2.3254
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,256,1,0,4.5501
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,512,1,0,9.2290
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,16,1,0,0.7599
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,32,1,0,1.2869
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,64,1,0,2.4015
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,128,1024,1,0,18.7420
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,128,1,0,4.5523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,16,1,0,0.1255
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,256,1,0,9.1605
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,32,1,0,0.1268
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,64,1,0,0.1319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,128,1,0,0.1361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,32,256,512,1,0,18.2875
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,256,1,0,0.1466
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,512,1,0,0.1709
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,1024,1,0,0.2211
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,1536,1,0,0.2691
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,2048,1,0,0.3176
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,3072,1,0,0.4328
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,4096,1,0,0.5523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,6144,1,0,0.8006
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,8192,1,0,1.0787
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,10240,1,0,1.3826
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,12288,1,0,1.7186
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,16384,1,0,2.4512
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,16,1,0,0.1282
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,1,32768,1,0,6.2241
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,32,1,0,0.1299
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,64,1,0,0.1323
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,128,1,0,0.1404
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,256,1,0,0.1698
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,512,1,0,0.2155
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,1024,1,0,0.3063
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,1536,1,0,0.4085
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,2048,1,0,0.5076
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,3072,1,0,0.7183
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,4096,1,0,0.9514
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,6144,1,0,1.4717
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,8192,1,0,2.0432
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,10240,1,0,2.6590
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,12288,1,0,3.3018
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,16384,1,0,4.7099
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,16,1,0,0.1306
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,32,1,0,0.1320
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,2,32768,1,0,12.0503
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,64,1,0,0.1445
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,128,1,0,0.1655
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,256,1,0,0.2125
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,512,1,0,0.2994
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,1024,1,0,0.4873
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,1536,1,0,0.6773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,2048,1,0,0.8859
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,3072,1,0,1.3411
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,4096,1,0,1.8239
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,6144,1,0,2.8658
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,8192,1,0,3.9811
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,10240,1,0,5.1598
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,12288,1,0,6.4100
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,16384,1,0,9.1345
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,16,1,0,0.1342
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,32,1,0,0.1424
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,64,1,0,0.1688
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,4,32768,1,0,23.8264
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,128,1,0,0.2079
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,256,1,0,0.2974
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,512,1,0,0.4738
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,1024,1,0,0.8498
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,1536,1,0,1.2699
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,2048,1,0,1.7137
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,3072,1,0,2.6285
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,4096,1,0,3.5752
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,6144,1,0,5.5867
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,8192,1,0,7.7653
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,10240,1,0,10.0925
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,16,1,0,0.1446
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,12288,1,0,12.5308
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,32,1,0,0.1708
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,8,16384,1,0,18.1506
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,64,1,0,0.2110
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,128,1,0,0.2976
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,256,1,0,0.4663
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,512,1,0,0.8330
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,1024,1,0,1.6523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,1536,1,0,2.5002
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,2048,1,0,3.3603
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,3072,1,0,5.1344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,4096,1,0,6.9989
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,16,1,0,0.1769
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,6144,1,0,10.9611
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,32,1,0,0.2175
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,16,8192,1,0,15.4981
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,64,1,0,0.3026
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,128,1,0,0.4690
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,256,1,0,0.8241
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,512,1,0,1.6239
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,1024,1,0,3.2598
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,1536,1,0,4.9034
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,2048,1,0,6.5799
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,3072,1,0,10.0937
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,16,1,0,0.2263
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,32,1,0,0.3130
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,32,4096,1,0,14.0118
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,64,1,0,0.4769
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,128,1,0,0.8234
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,256,1,0,1.6084
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,512,1,0,3.1990
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,1024,1,0,6.3773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,16,1,0,0.3301
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,1536,1,0,9.6445
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,32,1,0,0.4950
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,64,2048,1,0,13.2086
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,64,1,0,0.8399
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,128,1,0,1.6064
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,256,1,0,3.1841
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,512,1,0,6.2751
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,16,1,0,0.5342
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,32,1,0,0.8799
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,128,1024,1,0,12.7892
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,64,1,0,1.6448
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,128,1,0,3.1815
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,256,1,0,6.2392
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,16,1,0,0.1259
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,32,1,0,0.1259
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,64,1,0,0.1261
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,128,1,0,0.1321
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,16,256,512,1,0,12.5873
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,256,1,0,0.1402
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,512,1,0,0.1574
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,1024,1,0,0.1954
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,1536,1,0,0.2399
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,2048,1,0,0.2807
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,3072,1,0,0.3601
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,4096,1,0,0.4373
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,6144,1,0,0.6200
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,8192,1,0,0.8243
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,10240,1,0,1.0385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,12288,1,0,1.2699
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,16384,1,0,1.7522
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,1,32768,1,0,4.1500
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,16,1,0,0.1264
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,32,1,0,0.1281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,64,1,0,0.1299
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,128,1,0,0.1371
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,256,1,0,0.1545
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,512,1,0,0.1954
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,1536,1,0,0.3401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,1024,1,0,0.2676
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,2048,1,0,0.4136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,3072,1,0,0.5702
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,4096,1,0,0.7478
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,6144,1,0,1.1169
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,8192,1,0,1.5144
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,10240,1,0,1.9307
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,12288,1,0,2.3703
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,16384,1,0,3.3146
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,2,32768,1,0,7.9442
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,16,1,0,0.1278
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,32,1,0,0.1321
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,64,1,0,0.1368
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,128,1,0,0.1504
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,256,1,0,0.1933
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,512,1,0,0.2610
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,1024,1,0,0.3982
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,1536,1,0,0.5462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,2048,1,0,0.7040
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,3072,1,0,1.0377
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,4096,1,0,1.3886
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,6144,1,0,2.1243
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,8192,1,0,2.9158
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,10240,1,0,3.7349
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,12288,1,0,4.5949
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,16384,1,0,6.4247
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,16,1,0,0.1317
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,32,1,0,0.1360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,4,32768,1,0,15.4307
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,64,1,0,0.1527
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,128,1,0,0.1876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,256,1,0,0.2593
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,512,1,0,0.3927
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,1024,1,0,0.6829
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,1536,1,0,0.9955
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,3072,1,0,1.9885
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,2048,1,0,1.3198
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,4096,1,0,2.6984
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,6144,1,0,4.1555
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,8192,1,0,5.6870
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,10240,1,0,7.2931
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,12288,1,0,8.9524
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,8,16384,1,0,12.5440
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,16,1,0,0.1374
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,32,1,0,0.1506
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,64,1,0,0.1889
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,128,1,0,0.2557
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,256,1,0,0.3897
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,512,1,0,0.6714
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,1024,1,0,1.2862
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,1536,1,0,1.9229
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,2048,1,0,2.5856
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,3072,1,0,3.9258
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,4096,1,0,5.2894
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,6144,1,0,8.1300
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,16,1,0,0.1607
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,16,8192,1,0,11.1576
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,32,1,0,0.1872
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,64,1,0,0.2589
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,128,1,0,0.3914
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,256,1,0,0.6606
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,512,1,0,1.2669
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,1024,1,0,2.5235
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,1536,1,0,3.8005
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,2048,1,0,5.0862
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,3072,1,0,7.6912
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,16,1,0,0.1970
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,32,4096,1,0,10.3981
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,32,1,0,0.2644
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,64,1,0,0.3949
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,128,1,0,0.6667
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,256,1,0,1.2621
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,512,1,0,2.4927
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,1024,1,0,4.9773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,1536,1,0,7.4569
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,64,2048,1,0,9.9968
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,16,1,0,0.2732
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,32,1,0,0.4038
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,64,1,0,0.6720
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,128,1,0,1.2585
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,256,1,0,2.4855
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,512,1,0,4.9171
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,16,1,0,0.4232
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,32,1,0,0.6916
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,128,1024,1,0,9.7763
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,64,1,0,1.2776
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,128,1,0,2.4813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,256,1,0,4.9038
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,16,1,0,0.1236
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,32,1,0,0.1240
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,64,1,0,0.1280
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,8,256,512,1,0,9.6775
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,128,1,0,0.1321
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,256,1,0,0.1385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,512,1,0,0.1591
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,1024,1,0,0.1925
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,1536,1,0,0.2267
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,3072,1,0,0.3348
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,4096,1,0,0.4024
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,6144,1,0,0.5468
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,2048,1,0,0.2640
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,8192,1,0,0.6856
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,10240,1,0,0.8491
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,12288,1,0,1.0350
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,16384,1,0,1.4190
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,16,1,0,0.1258
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,1,32768,1,0,3.1132
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,32,1,0,0.1278
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,64,1,0,0.1285
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,128,1,0,0.1362
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,256,1,0,0.1525
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,512,1,0,0.1834
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,1024,1,0,0.2520
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,1536,1,0,0.3148
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,2048,1,0,0.3773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,3072,1,0,0.5025
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,4096,1,0,0.6353
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,6144,1,0,0.9419
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,8192,1,0,1.2650
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,10240,1,0,1.6003
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,12288,1,0,1.9325
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,16384,1,0,2.6353
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,2,32768,1,0,5.8967
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,16,1,0,0.1261
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,32,1,0,0.1299
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,64,1,0,0.1341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,128,1,0,0.1493
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,256,1,0,0.1853
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,512,1,0,0.2446
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,1024,1,0,0.3635
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,1536,1,0,0.4835
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,3072,1,0,0.8957
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,2048,1,0,0.6108
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,4096,1,0,1.1867
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,6144,1,0,1.7783
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,8192,1,0,2.3949
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,10240,1,0,3.0377
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,12288,1,0,3.6940
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,16,1,0,0.1284
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,16384,1,0,5.0697
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,32,1,0,0.1360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,4,32768,1,0,11.4032
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,64,1,0,0.1488
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,128,1,0,0.1828
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,256,1,0,0.2408
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,512,1,0,0.3560
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,1024,1,0,0.5979
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,1536,1,0,0.8699
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,2048,1,0,1.1461
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,3072,1,0,1.6988
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,4096,1,0,2.2727
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,6144,1,0,3.4509
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,8192,1,0,4.6622
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,10240,1,0,5.9092
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,12288,1,0,7.1940
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,16,1,0,0.1361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,32,1,0,0.1522
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,8,16384,1,0,9.8747
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,64,1,0,0.1841
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,128,1,0,0.2388
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,512,1,0,0.5919
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,256,1,0,0.3526
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,1024,1,0,1.1222
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,1536,1,0,1.6585
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,2048,1,0,2.2039
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,3072,1,0,3.3168
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,4096,1,0,4.4534
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,6144,1,0,6.7555
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,16,1,0,0.1456
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,32,1,0,0.1800
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,16,8192,1,0,9.1454
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,64,1,0,0.2398
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,128,1,0,0.3523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,256,1,0,0.5929
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,512,1,0,1.1119
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,1024,1,0,2.1668
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,1536,1,0,3.2496
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,2048,1,0,4.3405
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,3072,1,0,6.5266
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,32,4096,1,0,8.7528
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,16,1,0,0.1851
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,32,1,0,0.2408
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,64,1,0,0.3539
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,128,1,0,0.5886
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,256,1,0,1.1063
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,512,1,0,2.1487
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,1024,1,0,4.2787
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,1536,1,0,6.4000
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,16,1,0,0.2448
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,64,2048,1,0,8.5424
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,32,1,0,0.3612
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,64,1,0,0.5937
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,128,1,0,1.1072
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,256,1,0,2.1434
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,512,1,0,4.2453
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,16,1,0,0.3692
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,128,1024,1,0,8.4313
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,32,1,0,0.6036
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,64,1,0,1.1145
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,128,1,0,2.1400
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,256,1,0,4.2369
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,16,1,0,0.1217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,32,1,0,0.1258
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,4,256,512,1,0,8.3702
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,64,1,0,0.1265
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,128,1,0,0.1289
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,256,1,0,0.1384
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,512,1,0,0.1533
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,1024,1,0,0.1936
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,1536,1,0,0.2244
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,2048,1,0,0.2578
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,3072,1,0,0.3207
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,4096,1,0,0.3881
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,6144,1,0,0.5210
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,8192,1,0,0.6547
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,10240,1,0,0.7949
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,12288,1,0,0.9416
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,16384,1,0,1.2292
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,1,32768,1,0,2.6064
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,16,1,0,0.1239
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,64,1,0,0.1282
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,32,1,0,0.1260
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,128,1,0,0.1364
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,256,1,0,0.1513
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,512,1,0,0.1862
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,1024,1,0,0.2433
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,1536,1,0,0.3046
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,2048,1,0,0.3618
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,3072,1,0,0.4796
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,4096,1,0,0.6015
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,6144,1,0,0.8635
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,8192,1,0,1.1262
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,10240,1,0,1.4021
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,12288,1,0,1.6888
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,16384,1,0,2.3052
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,2,32768,1,0,4.8750
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,16,1,0,0.1238
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,32,1,0,0.1320
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,64,1,0,0.1343
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,128,1,0,0.1532
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,256,1,0,0.1804
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,512,1,0,0.2383
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,1024,1,0,0.3493
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,1536,1,0,0.4581
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,2048,1,0,0.5748
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,3072,1,0,0.8273
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,4096,1,0,1.0745
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,6144,1,0,1.5972
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,8192,1,0,2.1531
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,10240,1,0,2.7014
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,12288,1,0,3.2538
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,16384,1,0,4.3948
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,4,32768,1,0,9.3765
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,16,1,0,0.1283
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,32,1,0,0.1339
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,64,1,0,0.1462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,128,1,0,0.1803
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,256,1,0,0.2343
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,512,1,0,0.3434
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,1024,1,0,0.5645
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,1536,1,0,0.8071
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,2048,1,0,1.0488
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,3072,1,0,1.5477
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,4096,1,0,2.0767
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,6144,1,0,3.1026
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,8192,1,0,4.1525
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,10240,1,0,5.2252
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,12288,1,0,6.3084
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,16,1,0,0.1339
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,32,1,0,0.1483
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,8,16384,1,0,8.5428
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,64,1,0,0.1790
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,128,1,0,0.2317
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,256,1,0,0.3398
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,512,1,0,0.5548
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,1024,1,0,1.0368
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,1536,1,0,1.5233
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,2048,1,0,2.0343
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,3072,1,0,3.0258
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,4096,1,0,4.0272
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,6144,1,0,6.0681
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,16,1,0,0.1508
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,32,1,0,0.1770
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,64,1,0,0.2333
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,128,1,0,0.3368
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,16,8192,1,0,8.1286
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,256,1,0,0.5524
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,512,1,0,1.0298
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,1024,1,0,2.0100
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,1536,1,0,2.9828
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,2048,1,0,3.9552
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,3072,1,0,5.9333
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,16,1,0,0.1735
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,32,4096,1,0,7.9225
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,32,1,0,0.2323
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,64,1,0,0.3370
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,128,1,0,0.5485
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,256,1,0,1.0261
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,512,1,0,1.9991
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,1024,1,0,3.9272
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,1536,1,0,5.8611
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,64,2048,1,0,7.8073
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,32,1,0,0.3404
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,128,1,0,1.0294
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,64,1,0,0.5517
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,256,1,0,1.9909
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,512,1,0,3.9052
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,1024,1,0,7.7414
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,16,1,0,0.3453
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,128,16,1,0,0.2354
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,32,1,0,0.5609
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,64,1,0,1.0350
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,128,1,0,1.9900
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,256,1,0,3.9022
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,2,256,512,1,0,7.7165
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,16,1,0,0.1157
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,32,1,0,0.1196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,64,1,0,0.1225
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,128,1,0,0.1278
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,256,1,0,0.1331
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,512,1,0,0.1489
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,1024,1,0,0.1862
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,1536,1,0,0.2185
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,2048,1,0,0.2541
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,3072,1,0,0.3183
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,4096,1,0,0.3776
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,6144,1,0,0.5059
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,8192,1,0,0.6331
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,10240,1,0,0.7709
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,12288,1,0,0.9142
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,16384,1,0,1.1874
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,16,1,0,0.1201
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,32,1,0,0.1217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,64,1,0,0.1242
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,1,32768,1,0,2.3053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,128,1,0,0.1322
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,256,1,0,0.1463
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,512,1,0,0.1799
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,1024,1,0,0.2403
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,1536,1,0,0.2958
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,2048,1,0,0.3523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,3072,1,0,0.4628
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,4096,1,0,0.5847
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,6144,1,0,0.8360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,8192,1,0,1.0858
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,10240,1,0,1.3427
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,12288,1,0,1.5856
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,16384,1,0,2.1014
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,16,1,0,0.1258
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,2,32768,1,0,4.3389
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,32,1,0,0.1278
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,64,1,0,0.1340
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,128,1,0,0.1405
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,256,1,0,0.1778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,512,1,0,0.2349
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,1536,1,0,0.4439
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,1024,1,0,0.3407
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,2048,1,0,0.5608
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,3072,1,0,0.7978
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,4096,1,0,1.0344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,6144,1,0,1.5123
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,8192,1,0,1.9956
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,10240,1,0,2.4988
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,12288,1,0,3.0031
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,16384,1,0,4.0301
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,16,1,0,0.1279
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,4,32768,1,0,8.2992
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,32,1,0,0.1304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,64,1,0,0.1439
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,128,1,0,0.1742
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,256,1,0,0.2303
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,512,1,0,0.3336
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,1024,1,0,0.5479
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,1536,1,0,0.7803
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,2048,1,0,1.0074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,3072,1,0,1.4737
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,4096,1,0,1.9467
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,6144,1,0,2.9085
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,8192,1,0,3.8749
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,10240,1,0,4.8569
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,12288,1,0,5.8213
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,8,16384,1,0,7.8190
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,16,1,0,0.1308
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,32,1,0,0.1444
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,64,1,0,0.1730
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,128,1,0,0.2274
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,256,1,0,0.3311
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,512,1,0,0.5413
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,1024,1,0,0.9950
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,1536,1,0,1.4526
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,2048,1,0,1.9220
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,3072,1,0,2.8631
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,4096,1,0,3.7963
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,6144,1,0,5.6726
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,32,1,0,0.1704
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,16,1,0,0.1421
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,16,8192,1,0,7.5795
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,64,1,0,0.2269
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,128,1,0,0.3290
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,256,1,0,0.5326
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,512,1,0,0.9877
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,1024,1,0,1.9081
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,1536,1,0,2.8374
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,2048,1,0,3.7570
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,3072,1,0,5.5967
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,32,4096,1,0,7.4548
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,16,1,0,0.1708
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,32,1,0,0.2267
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,64,1,0,0.3281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,128,1,0,0.5295
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,256,1,0,0.9856
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,512,1,0,1.9036
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,1024,1,0,3.7430
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,1536,1,0,5.5595
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,16,1,0,0.2283
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,64,2048,1,0,7.3883
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,32,1,0,0.3274
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,64,1,0,0.5304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,128,1,0,0.9829
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,256,1,0,1.8989
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,512,1,0,3.7291
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,16,1,0,0.3300
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,128,1024,1,0,7.3482
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,32,1,0,0.5376
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,64,1,0,0.9870
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,128,1,0,1.8996
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,256,1,0,3.7130
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,16,1,0,0.1341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,fp8_block,1,256,512,1,0,7.3353
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,32,1,0,0.1380
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,64,1,0,0.1441
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,128,1,0,0.1572
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,256,1,0,0.1773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,512,1,0,0.2446
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1024,1,0,0.3949
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,1536,1,0,0.5470
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,2048,1,0,0.7294
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,3072,1,0,1.1489
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,4096,1,0,1.6140
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,6144,1,0,2.6407
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,8192,1,0,3.8353
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,10240,1,0,5.2011
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,12288,1,0,6.7161
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,16384,1,0,10.4325
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,16,1,0,0.1445
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,32,1,0,0.1466
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,64,1,0,0.1584
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,128,1,0,0.1781
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,256,1,0,0.2402
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,512,1,0,0.3720
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,1,32768,1,0,31.7474
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1024,1,0,0.6713
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,1536,1,0,1.0251
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,2048,1,0,1.3983
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,3072,1,0,2.1864
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,4096,1,0,3.0701
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,6144,1,0,5.1237
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,8192,1,0,7.7299
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,10240,1,0,10.4980
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,12288,1,0,13.2588
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,16384,1,0,20.6272
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,16,1,0,0.1523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,32,1,0,0.1628
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,64,1,0,0.1837
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,128,1,0,0.2394
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,256,1,0,0.3650
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,512,1,0,0.6404
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1024,1,0,1.2876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,1536,1,0,1.9535
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,2048,1,0,2.6655
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,2,32768,1,0,63.0324
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,3072,1,0,4.2386
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,4096,1,0,6.2295
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,6144,1,0,10.1296
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,8192,1,0,15.2917
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,10240,1,0,20.7923
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,16,1,0,0.1729
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,32,1,0,0.1933
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,12288,1,0,26.9553
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,64,1,0,0.2489
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,128,1,0,0.3633
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,16384,1,0,40.9832
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,256,1,0,0.6262
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,512,1,0,1.2349
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1024,1,0,2.4542
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,1536,1,0,3.7929
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,2048,1,0,5.4253
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,3072,1,0,8.3904
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,4096,1,0,12.3499
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,6144,1,0,20.7120
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,8192,1,0,30.4333
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,16,1,0,0.2115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,10240,1,0,41.4208
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,32,1,0,0.2666
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,4,32768,1,0,122.7561
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,64,1,0,0.3813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,128,1,0,0.6272
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,256,1,0,1.2126
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,12288,1,0,51.4924
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,512,1,0,2.3461
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1024,1,0,5.0238
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,1536,1,0,7.5000
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,2048,1,0,10.7511
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,8,16384,1,0,78.6782
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,3072,1,0,17.2836
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,4096,1,0,24.6198
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,16,1,0,0.3034
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,32,1,0,0.4189
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,64,1,0,0.6633
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,128,1,0,1.2159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,6144,1,0,39.0737
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,256,1,0,2.3070
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,512,1,0,4.8051
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1024,1,0,9.9441
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,16,8192,1,0,57.7473
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,1536,1,0,15.5084
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,2048,1,0,21.3979
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,16,1,0,0.4969
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,32,1,0,0.7397
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,64,1,0,1.2899
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,3072,1,0,32.1854
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,128,1,0,2.3158
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,256,1,0,4.7358
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,32,4096,1,0,46.2240
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,512,1,0,9.5250
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,16,1,0,0.8913
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1024,1,0,19.8179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,32,1,0,1.4402
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,64,1,0,2.4681
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,1536,1,0,28.6667
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,128,1,0,4.7450
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,64,2048,1,0,39.7818
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,256,1,0,9.3968
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,512,1,0,18.9901
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,16,1,0,1.7460
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,32,1,0,2.7683
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,64,1,0,5.0498
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,128,1024,1,0,36.6015
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,128,1,0,9.4066
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,16,1,0,0.1174
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,32,1,0,0.1160
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,256,1,0,18.7426
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,64,1,0,0.1220
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,128,1,0,0.1267
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,256,1,0,0.1462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,128,256,512,1,0,34.9417
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,512,1,0,0.1868
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1024,1,0,0.2805
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,1536,1,0,0.3733
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,2048,1,0,0.4809
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,3072,1,0,0.7101
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,4096,1,0,0.9736
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,6144,1,0,1.5859
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,8192,1,0,2.2790
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,10240,1,0,3.0436
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,12288,1,0,3.8848
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,16384,1,0,5.8056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,1,32768,1,0,17.2157
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,32,1,0,0.1202
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,16,1,0,0.1166
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,64,1,0,0.1278
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,128,1,0,0.1440
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,256,1,0,0.1835
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,512,1,0,0.2679
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1024,1,0,0.4448
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,1536,1,0,0.6413
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,2048,1,0,0.8581
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,3072,1,0,1.3443
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,4096,1,0,1.8816
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,6144,1,0,3.0620
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,8192,1,0,4.4240
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,10240,1,0,5.9335
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,12288,1,0,7.5893
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,16,1,0,0.1247
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,16384,1,0,11.5823
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,32,1,0,0.1319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,64,1,0,0.1483
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,128,1,0,0.1843
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,256,1,0,0.2624
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,512,1,0,0.4247
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,2,32768,1,0,34.0407
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1024,1,0,0.7989
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,1536,1,0,1.2228
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,2048,1,0,1.6645
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,3072,1,0,2.6079
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,4096,1,0,3.6592
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,6144,1,0,5.9964
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,8192,1,0,8.8836
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,10240,1,0,11.9323
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,16,1,0,0.1359
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,12288,1,0,15.0006
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,32,1,0,0.1534
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,16384,1,0,22.9286
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,64,1,0,0.1892
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,128,1,0,0.2650
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,256,1,0,0.4186
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,512,1,0,0.7683
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1024,1,0,1.5557
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,1536,1,0,2.3719
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,2048,1,0,3.2472
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,3072,1,0,5.1185
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,4096,1,0,7.3992
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,4,32768,1,0,67.6443
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,6144,1,0,11.8631
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,8192,1,0,17.6310
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,10240,1,0,23.6742
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,16,1,0,0.1629
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,32,1,0,0.1985
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,64,1,0,0.2708
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,12288,1,0,30.3545
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,128,1,0,0.4168
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,256,1,0,0.7549
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,512,1,0,1.4979
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1024,1,0,3.0364
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,8,16384,1,0,45.5706
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,1536,1,0,4.6676
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,2048,1,0,6.5886
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,3072,1,0,10.1484
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,4096,1,0,14.6904
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,16,1,0,0.2179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,32,1,0,0.2922
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,64,1,0,0.4349
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,6144,1,0,24.1475
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,128,1,0,0.7566
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,256,1,0,1.4794
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,16,8192,1,0,35.0089
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,512,1,0,2.9283
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1024,1,0,6.1851
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,1536,1,0,9.2529
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,2048,1,0,13.0867
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,16,1,0,0.3284
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,32,1,0,0.4731
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,64,1,0,0.7917
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,3072,1,0,20.7414
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,128,1,0,1.4812
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,32,4096,1,0,29.1688
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,256,1,0,2.8876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,512,1,0,5.9726
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,16,1,0,0.5506
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1024,1,0,12.2833
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,32,1,0,0.8668
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,1536,1,0,18.9838
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,64,1,0,1.5539
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,128,1,0,2.8976
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,64,2048,1,0,26.0328
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,256,1,0,5.8887
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,512,1,0,11.8574
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,16,1,0,1.0189
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,32,1,0,1.7039
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,64,1,0,3.0489
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,128,1024,1,0,24.4094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,128,1,0,5.9130
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,256,1,0,11.7023
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,16,1,0,0.1094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,32,1,0,0.1112
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,64,1,0,0.1141
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,128,1,0,0.1176
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,256,1,0,0.1258
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,64,256,512,1,0,23.5669
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,512,1,0,0.1529
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1536,1,0,0.2662
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,1024,1,0,0.2064
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,2048,1,0,0.3296
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,3072,1,0,0.4628
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,4096,1,0,0.6068
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,6144,1,0,0.9450
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,8192,1,0,1.3420
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,12288,1,0,2.2646
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,10240,1,0,1.7854
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,16384,1,0,3.3429
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,16,1,0,0.1134
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,1,32768,1,0,9.3467
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,32,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,64,1,0,0.1179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,128,1,0,0.1221
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,512,1,0,0.1999
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,256,1,0,0.1490
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1024,1,0,0.3075
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,2048,1,0,0.5363
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,1536,1,0,0.4217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,3072,1,0,0.8092
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,4096,1,0,1.1224
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,6144,1,0,1.8260
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,8192,1,0,2.6120
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,10240,1,0,3.4658
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,12288,1,0,4.3821
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,16,1,0,0.1137
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,16384,1,0,6.4600
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,2,32768,1,0,18.5287
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,64,1,0,0.1238
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,128,1,0,0.1464
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,256,1,0,0.1974
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,32,1,0,0.1176
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,512,1,0,0.2956
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1024,1,0,0.5035
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,1536,1,0,0.7391
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,2048,1,0,1.0038
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,3072,1,0,1.5888
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,4096,1,0,2.2116
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,6144,1,0,3.5616
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,8192,1,0,5.0704
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,10240,1,0,6.7555
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,12288,1,0,8.5783
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,16384,1,0,12.8923
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,16,1,0,0.1199
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,32,1,0,0.1257
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,64,1,0,0.1504
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,128,1,0,0.1996
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,4,32768,1,0,36.6116
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,512,1,0,0.4847
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,256,1,0,0.2903
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1024,1,0,0.9442
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,1536,1,0,1.4625
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,2048,1,0,1.9932
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,3072,1,0,3.1059
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,4096,1,0,4.3174
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,6144,1,0,6.9918
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,16,1,0,0.1302
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,8192,1,0,10.1964
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,10240,1,0,13.5674
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,32,1,0,0.1560
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,12288,1,0,16.9370
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,8,16384,1,0,25.4724
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,64,1,0,0.2040
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,128,1,0,0.2923
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,256,1,0,0.4773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,512,1,0,0.9122
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1024,1,0,1.8866
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,1536,1,0,2.8746
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,2048,1,0,3.9020
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,3072,1,0,6.1132
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,4096,1,0,8.7112
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,16,1,0,0.1646
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,6144,1,0,13.8026
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,32,1,0,0.2139
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,64,1,0,0.2988
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,16,8192,1,0,20.1642
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,128,1,0,0.4748
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,256,1,0,0.9004
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,512,1,0,1.8295
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1024,1,0,3.6954
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,1536,1,0,5.6631
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,2048,1,0,7.9038
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,3072,1,0,12.0863
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,16,1,0,0.2317
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,32,1,0,0.3186
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,32,4096,1,0,17.2260
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,64,1,0,0.4944
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,128,1,0,0.9034
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,256,1,0,1.8052
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,512,1,0,3.5847
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1024,1,0,7.4996
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,16,1,0,0.3573
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,1536,1,0,11.1762
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,32,1,0,0.5328
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,64,2048,1,0,15.6364
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,64,1,0,0.9355
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,128,1,0,1.8119
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,256,1,0,3.5415
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,512,1,0,7.2834
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,16,1,0,0.6097
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,32,1,0,1.0126
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,64,1,0,1.8846
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,128,1024,1,0,14.8388
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,128,1,0,3.5540
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,256,1,0,7.2023
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,16,1,0,0.1074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,32,1,0,0.1116
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,64,1,0,0.1103
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,128,1,0,0.1115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,32,256,512,1,0,14.4092
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,256,1,0,0.1203
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,512,1,0,0.1320
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1024,1,0,0.1751
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,1536,1,0,0.2122
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,3072,1,0,0.3406
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,2048,1,0,0.2486
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,4096,1,0,0.4342
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,6144,1,0,0.6356
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,10240,1,0,1.1341
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,8192,1,0,0.8716
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,12288,1,0,1.4232
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,16384,1,0,2.0785
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,16,1,0,0.1114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,1,32768,1,0,5.5174
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,32,1,0,0.1099
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,64,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,128,1,0,0.1176
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,256,1,0,0.1281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,512,1,0,0.1688
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1024,1,0,0.2356
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,1536,1,0,0.3146
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,2048,1,0,0.3915
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,3072,1,0,0.5552
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,4096,1,0,0.7432
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,6144,1,0,1.1767
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,8192,1,0,1.6733
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,10240,1,0,2.2080
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,12288,1,0,2.7624
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,16384,1,0,3.9989
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,2,32768,1,0,10.6802
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,16,1,0,0.1117
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,32,1,0,0.1098
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,128,1,0,0.1278
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,64,1,0,0.1166
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,256,1,0,0.1647
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,512,1,0,0.2292
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1024,1,0,0.3694
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,1536,1,0,0.5133
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,2048,1,0,0.6774
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,3072,1,0,1.0441
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,4096,1,0,1.4532
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,6144,1,0,2.3285
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,8192,1,0,3.2653
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,10240,1,0,4.2845
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,12288,1,0,5.3764
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,16,1,0,0.1116
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,16384,1,0,7.7835
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,32,1,0,0.1159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,64,1,0,0.1257
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,128,1,0,0.1627
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,4,32768,1,0,21.1165
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,256,1,0,0.2268
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,512,1,0,0.3572
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1024,1,0,0.6417
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,1536,1,0,0.9761
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,2048,1,0,1.3367
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,3072,1,0,2.0879
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,4096,1,0,2.8694
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,6144,1,0,4.5683
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,8192,1,0,6.3969
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,10240,1,0,8.3817
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,16,1,0,0.1186
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,12288,1,0,10.5226
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,32,1,0,0.1300
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,8,16384,1,0,15.4699
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,64,1,0,0.1654
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,128,1,0,0.2290
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,256,1,0,0.3494
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1024,1,0,1.2773
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,512,1,0,0.6225
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,1536,1,0,1.9710
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,2048,1,0,2.6554
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,3072,1,0,4.1073
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,4096,1,0,5.6399
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,16,1,0,0.1356
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,6144,1,0,8.9332
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,32,1,0,0.1711
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,64,1,0,0.2326
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,16,8192,1,0,12.7597
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,128,1,0,0.3516
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,256,1,0,0.6166
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,512,1,0,1.2480
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1024,1,0,2.5455
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,1536,1,0,3.8717
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,2048,1,0,5.2304
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,3072,1,0,8.0607
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,16,1,0,0.1808
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,32,4096,1,0,11.2848
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,32,1,0,0.2410
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,64,1,0,0.3610
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,128,1,0,0.6154
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,256,1,0,1.2354
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,512,1,0,2.4912
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1024,1,0,5.0195
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,1536,1,0,7.6074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,16,1,0,0.2609
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,64,2048,1,0,10.4888
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,32,1,0,0.3798
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,64,1,0,0.6322
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,128,1,0,1.2363
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,256,1,0,2.4660
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,512,1,0,4.9062
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,16,1,0,0.4183
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,128,1024,1,0,10.0761
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,64,1,0,1.2732
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,32,1,0,0.6715
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,256,1,0,4.8699
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,128,1,0,2.4714
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,16,256,512,1,0,9.8587
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,32,1,0,0.1074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,16,1,0,0.1082
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,64,1,0,0.1098
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,256,1,0,0.1177
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1024,1,0,0.1590
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,512,1,0,0.1279
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,1536,1,0,0.1891
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,2048,1,0,0.2194
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,4096,1,0,0.3384
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,3072,1,0,0.2814
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,6144,1,0,0.4817
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,8192,1,0,0.6488
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,10240,1,0,0.8342
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,12288,1,0,1.0237
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,16384,1,0,1.4335
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,128,1,0,0.1124
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,1,32768,1,0,3.5580
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,16,1,0,0.1074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,32,1,0,0.1095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,64,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,128,1,0,0.1155
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,256,1,0,0.1230
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1024,1,0,0.2080
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,2048,1,0,0.3139
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,3072,1,0,0.4345
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,1536,1,0,0.2624
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,4096,1,0,0.5732
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,6144,1,0,0.8702
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,8192,1,0,1.1923
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,10240,1,0,1.5437
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,12288,1,0,1.9184
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,16384,1,0,2.7287
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,16,1,0,0.1095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,32,1,0,0.1096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,32768,1,0,6.8257
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,2,512,1,0,0.1525
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,64,1,0,0.1138
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,128,1,0,0.1218
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,256,1,0,0.1503
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,512,1,0,0.2013
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1024,1,0,0.2993
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,1536,1,0,0.4078
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,2048,1,0,0.5306
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,4096,1,0,1.0660
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,6144,1,0,1.6687
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,8192,1,0,2.3218
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,10240,1,0,3.0248
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,12288,1,0,3.7499
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,16384,1,0,5.3036
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,32768,1,0,13.2158
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,4,3072,1,0,0.7908
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,16,1,0,0.1117
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,32,1,0,0.1155
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,64,1,0,0.1217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,128,1,0,0.1463
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,512,1,0,0.2924
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,256,1,0,0.1979
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1024,1,0,0.5086
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,2048,1,0,1.0011
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,3072,1,0,1.5377
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,4096,1,0,2.1061
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,6144,1,0,3.3124
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,8192,1,0,4.5646
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,10240,1,0,5.8938
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,1536,1,0,0.7504
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,12288,1,0,7.2858
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,8,16384,1,0,10.3198
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,16,1,0,0.1156
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,32,1,0,0.1216
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,64,1,0,0.1452
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,128,1,0,0.1954
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,512,1,0,0.4972
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1024,1,0,0.9659
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,1536,1,0,1.4680
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,2048,1,0,1.9902
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,3072,1,0,3.0759
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,4096,1,0,4.1715
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,6144,1,0,6.4687
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,16,1,0,0.1244
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,8192,1,0,8.9232
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,32,1,0,0.1484
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,64,1,0,0.1982
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,16,256,1,0,0.2898
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,128,1,0,0.2897
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,256,1,0,0.4877
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,512,1,0,0.9474
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1024,1,0,1.9307
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,1536,1,0,2.9498
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,2048,1,0,3.9623
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,3072,1,0,6.0195
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,16,1,0,0.1555
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,32,1,0,0.2038
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,64,1,0,0.2971
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,128,1,0,0.4909
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,256,1,0,0.9389
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,512,1,0,1.9000
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,32,4096,1,0,8.1765
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1024,1,0,3.8517
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,1536,1,0,5.7935
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,64,2048,1,0,7.7759
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,16,1,0,0.2121
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,32,1,0,0.3047
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,64,1,0,0.4971
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,256,1,0,1.8882
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,512,1,0,3.7963
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,128,1,0,0.9393
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,16,1,0,0.3253
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,128,1024,1,0,7.5586
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,32,1,0,0.5170
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,64,1,0,0.9579
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,128,1,0,1.8901
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,256,1,0,3.7748
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,16,1,0,0.0994
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,64,1,0,0.1105
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,32,1,0,0.1036
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,128,1,0,0.1118
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,256,1,0,0.1170
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,8,256,512,1,0,7.4483
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,512,1,0,0.1259
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1024,1,0,0.1545
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,1536,1,0,0.1794
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,2048,1,0,0.2080
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,3072,1,0,0.2618
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,4096,1,0,0.3119
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,6144,1,0,0.4167
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,8192,1,0,0.5319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,10240,1,0,0.6625
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,16384,1,0,1.1365
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,12288,1,0,0.8154
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,1,32768,1,0,2.5768
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,16,1,0,0.1074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,32,1,0,0.1094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,64,1,0,0.1118
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,128,1,0,0.1156
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,256,1,0,0.1238
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1024,1,0,0.1951
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,512,1,0,0.1463
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,2048,1,0,0.2862
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,3072,1,0,0.3785
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,1536,1,0,0.2423
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,4096,1,0,0.4822
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,6144,1,0,0.7194
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,8192,1,0,0.9835
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,10240,1,0,1.2572
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,12288,1,0,1.5301
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,16384,1,0,2.0995
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,2,32768,1,0,4.9042
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,16,1,0,0.1096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,32,1,0,0.1117
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,64,1,0,0.1139
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,128,1,0,0.1217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,256,1,0,0.1424
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,512,1,0,0.1882
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1024,1,0,0.2726
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,1536,1,0,0.3602
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,2048,1,0,0.4561
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,3072,1,0,0.6717
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,4096,1,0,0.9063
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,6144,1,0,1.3757
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,8192,1,0,1.8593
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,10240,1,0,2.3842
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,12288,1,0,2.9258
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,16384,1,0,4.0728
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,32,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,16,1,0,0.1118
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,4,32768,1,0,9.4344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,64,1,0,0.1201
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,128,1,0,0.1415
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,256,1,0,0.1843
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,512,1,0,0.2671
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1024,1,0,0.4426
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,1536,1,0,0.6477
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,2048,1,0,0.8620
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,3072,1,0,1.2948
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,4096,1,0,1.7344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,6144,1,0,2.6864
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,8192,1,0,3.6684
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,10240,1,0,4.6862
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,12288,1,0,5.7307
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,16,1,0,0.1137
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,32,1,0,0.1199
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,8,16384,1,0,7.9277
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,64,1,0,0.1410
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,128,1,0,0.1813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,256,1,0,0.2641
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,512,1,0,0.4370
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1024,1,0,0.8399
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,1536,1,0,1.2535
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,2048,1,0,1.6666
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,3072,1,0,2.5554
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,4096,1,0,3.4535
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,6144,1,0,5.2855
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,16,8192,1,0,7.1883
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,16,1,0,0.1199
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,32,1,0,0.1407
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,64,1,0,0.1814
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,128,1,0,0.2632
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,256,1,0,0.4324
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,512,1,0,0.8282
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1024,1,0,1.6302
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,1536,1,0,2.4775
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,2048,1,0,3.3364
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,3072,1,0,5.0541
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,16,1,0,0.1442
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,32,4096,1,0,6.7966
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,32,1,0,0.1848
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,64,1,0,0.2652
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,128,1,0,0.4339
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,256,1,0,0.8221
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,512,1,0,1.6130
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1536,1,0,4.9349
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,1024,1,0,3.2795
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,64,2048,1,0,6.5820
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,16,1,0,0.1904
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,32,1,0,0.2712
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,64,1,0,0.4414
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,128,1,0,0.8247
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,256,1,0,1.6025
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,512,1,0,3.2476
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,128,1024,1,0,6.4728
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,16,1,0,0.2781
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,32,1,0,0.4477
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,64,1,0,0.8315
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,128,1,0,1.6081
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,256,1,0,3.2364
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,16,1,0,0.1032
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,4,256,512,1,0,6.4190
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,32,1,0,0.1052
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,64,1,0,0.1114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,128,1,0,0.1128
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,256,1,0,0.1180
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,512,1,0,0.1241
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1024,1,0,0.1525
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,1536,1,0,0.1779
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,2048,1,0,0.2033
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,3072,1,0,0.2542
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,4096,1,0,0.3002
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,6144,1,0,0.4001
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,8192,1,0,0.5079
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,10240,1,0,0.6205
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,12288,1,0,0.7334
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,16384,1,0,0.9639
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,1,32768,1,0,2.1060
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,16,1,0,0.1094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,32,1,0,0.1107
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,64,1,0,0.1117
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,128,1,0,0.1134
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,256,1,0,0.1227
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,512,1,0,0.1470
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1024,1,0,0.1898
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,1536,1,0,0.2364
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,2048,1,0,0.2747
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,3072,1,0,0.3622
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,4096,1,0,0.4561
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,6144,1,0,0.6557
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,8192,1,0,0.8617
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,10240,1,0,1.0851
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,12288,1,0,1.3197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,16384,1,0,1.8017
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,2,32768,1,0,3.9184
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,16,1,0,0.1096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,32,1,0,0.1075
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,64,1,0,0.1121
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,128,1,0,0.1197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,256,1,0,0.1429
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,512,1,0,0.1834
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1024,1,0,0.2631
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,1536,1,0,0.3418
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,2048,1,0,0.4303
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,3072,1,0,0.6171
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,4096,1,0,0.8101
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,6144,1,0,1.2251
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,8192,1,0,1.6483
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,10240,1,0,2.0900
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,12288,1,0,2.5356
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,16384,1,0,3.4328
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,4,32768,1,0,7.5139
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,16,1,0,0.1094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,32,1,0,0.1138
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,64,1,0,0.1200
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,128,1,0,0.1399
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,256,1,0,0.1813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,512,1,0,0.2552
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1024,1,0,0.4159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,1536,1,0,0.5988
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,3072,1,0,1.1769
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,2048,1,0,0.7839
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,4096,1,0,1.5688
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,6144,1,0,2.3804
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,8192,1,0,3.1919
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,10240,1,0,4.0351
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,12288,1,0,4.8989
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,8,16384,1,0,6.6846
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,16,1,0,0.1138
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,32,1,0,0.1195
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,64,1,0,0.1381
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,128,1,0,0.1774
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,256,1,0,0.2535
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,512,1,0,0.4099
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1024,1,0,0.7719
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,1536,1,0,1.1513
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,2048,1,0,1.5281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,3072,1,0,2.2965
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,4096,1,0,3.0695
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,6144,1,0,4.6453
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,16,8192,1,0,6.2812
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,16,1,0,0.1183
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,32,1,0,0.1385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,64,1,0,0.1777
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,128,1,0,0.2499
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,256,1,0,0.4066
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,512,1,0,0.7634
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1024,1,0,1.5045
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,1536,1,0,2.2595
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,2048,1,0,3.0009
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,3072,1,0,4.5164
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,32,4096,1,0,6.0606
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,16,1,0,0.1392
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,32,1,0,0.1776
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,64,1,0,0.2491
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,128,1,0,0.4059
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,256,1,0,0.7619
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,512,1,0,1.4931
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1024,1,0,2.9675
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,1536,1,0,4.4456
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,16,1,0,0.1811
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,64,2048,1,0,5.9484
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,32,1,0,0.2538
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,64,1,0,0.4086
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,128,1,0,0.7618
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,256,1,0,1.4845
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,512,1,0,2.9495
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,16,1,0,0.2577
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,128,1024,1,0,5.8855
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,32,1,0,0.4138
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,64,1,0,0.7684
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,128,1,0,1.4902
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,256,1,0,2.9396
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,16,1,0,0.0973
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,32,1,0,0.1033
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,2,256,512,1,0,5.8569
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,64,1,0,0.1077
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,128,1,0,0.1097
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,256,1,0,0.1137
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,512,1,0,0.1241
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1024,1,0,0.1484
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,1536,1,0,0.1751
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,2048,1,0,0.1987
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,3072,1,0,0.2511
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,4096,1,0,0.2956
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,6144,1,0,0.3919
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,8192,1,0,0.4953
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,10240,1,0,0.6073
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,12288,1,0,0.7126
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,16384,1,0,0.9389
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,1,32768,1,0,1.8368
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,16,1,0,0.1056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,32,1,0,0.1056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,64,1,0,0.1053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,128,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,256,1,0,0.1217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,512,1,0,0.1424
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1024,1,0,0.1863
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,1536,1,0,0.2307
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,2048,1,0,0.2706
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,3072,1,0,0.3537
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,4096,1,0,0.4454
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,6144,1,0,0.6398
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,8192,1,0,0.8360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,10240,1,0,1.0385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,12288,1,0,1.2385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,16384,1,0,1.6362
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,2,32768,1,0,3.4441
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,16,1,0,0.1013
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,32,1,0,0.1094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,64,1,0,0.1115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,128,1,0,0.1160
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,256,1,0,0.1401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,512,1,0,0.1792
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1024,1,0,0.2578
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,1536,1,0,0.3344
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,2048,1,0,0.4182
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,3072,1,0,0.6008
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,4096,1,0,0.7854
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,6144,1,0,1.1598
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,8192,1,0,1.5303
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,10240,1,0,1.9259
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,12288,1,0,2.3306
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,16384,1,0,3.1446
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,16,1,0,0.1091
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,4,32768,1,0,6.5453
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,32,1,0,0.1097
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,64,1,0,0.1177
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,128,1,0,0.1380
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,256,1,0,0.1772
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,512,1,0,0.2514
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1536,1,0,0.5813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,1024,1,0,0.4079
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,2048,1,0,0.7600
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,3072,1,0,1.1204
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,4096,1,0,1.4779
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,6144,1,0,2.2405
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,8192,1,0,2.9966
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,10240,1,0,3.7510
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,12288,1,0,4.5050
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,16,1,0,0.1100
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,8,16384,1,0,6.0604
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,32,1,0,0.1176
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,64,1,0,0.1370
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,128,1,0,0.1749
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,256,1,0,0.2483
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,512,1,0,0.3996
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1024,1,0,0.7465
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,1536,1,0,1.1008
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,2048,1,0,1.4577
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,3072,1,0,2.1907
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,4096,1,0,2.9144
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,6144,1,0,4.3539
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,16,8192,1,0,5.8154
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,16,1,0,0.1158
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,64,1,0,0.1732
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,32,1,0,0.1361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,128,1,0,0.2447
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,256,1,0,0.3978
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,512,1,0,0.7407
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1024,1,0,1.4421
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,1536,1,0,2.1683
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,2048,1,0,2.8671
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,3072,1,0,4.2789
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,32,4096,1,0,5.6925
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,16,1,0,0.1361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,32,1,0,0.1747
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,64,1,0,0.2448
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,128,1,0,0.3949
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,256,1,0,0.7365
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,512,1,0,1.4342
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1024,1,0,2.8448
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,1536,1,0,4.2352
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,64,2048,1,0,5.6309
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,16,1,0,0.1742
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,32,1,0,0.2466
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,64,1,0,0.3944
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,128,1,0,0.7348
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,256,1,0,1.4326
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,512,1,0,2.8353
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,16,1,0,0.2484
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,128,1024,1,0,5.5943
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,32,1,0,0.3963
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,64,1,0,0.7385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,128,1,0,1.4345
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,256,1,0,2.8282
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,float16,float16,nvfp4,1,256,512,1,0,5.5723
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,32,1,0,0.1394
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,16,1,0,0.1369
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,128,1,0,0.1547
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,64,1,0,0.1462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,256,1,0,0.1769
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,512,1,0,0.2462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,1024,1,0,0.3916
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,1536,1,0,0.5504
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,2048,1,0,0.7311
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,3072,1,0,1.1515
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,4096,1,0,1.6135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,6144,1,0,2.6424
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,8192,1,0,3.8447
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,10240,1,0,5.2079
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,12288,1,0,6.7305
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,16,1,0,0.1403
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,16384,1,0,10.4284
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,32,1,0,0.1484
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,64,1,0,0.1603
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,128,1,0,0.1787
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,256,1,0,0.2403
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,1,32768,1,0,31.7481
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,512,1,0,0.3724
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,1024,1,0,0.6693
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,1536,1,0,1.0269
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,2048,1,0,1.4032
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,3072,1,0,2.1898
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,4096,1,0,3.0820
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,6144,1,0,5.1282
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,8192,1,0,7.7438
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,10240,1,0,10.5162
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,12288,1,0,13.2814
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,16,1,0,0.1538
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,32,1,0,0.1654
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,16384,1,0,20.6222
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,64,1,0,0.1838
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,128,1,0,0.2390
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,256,1,0,0.3672
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,512,1,0,0.6399
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,1024,1,0,1.2904
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,1536,1,0,1.9534
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,2048,1,0,2.6689
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,3072,1,0,4.2540
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,2,32768,1,0,63.0642
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,4096,1,0,6.2557
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,6144,1,0,10.1393
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,8192,1,0,15.3139
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,10240,1,0,20.8084
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,16,1,0,0.1730
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,32,1,0,0.1940
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,12288,1,0,26.9301
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,64,1,0,0.2468
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,128,1,0,0.3656
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,16384,1,0,41.0081
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,256,1,0,0.6275
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,512,1,0,1.2404
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,1024,1,0,2.4574
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,1536,1,0,3.7912
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,2048,1,0,5.4315
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,3072,1,0,8.4146
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,4096,1,0,12.3677
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,6144,1,0,20.7191
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,8192,1,0,30.5196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,16,1,0,0.2118
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,32,1,0,0.2673
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,10240,1,0,41.4402
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,4,32768,1,0,122.8861
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,128,1,0,0.6267
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,64,1,0,0.3825
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,12288,1,0,51.4268
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,256,1,0,1.2150
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,512,1,0,2.3551
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,1024,1,0,5.0348
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,1536,1,0,7.5170
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,8,16384,1,0,78.8223
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,2048,1,0,10.7874
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,3072,1,0,17.3258
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,4096,1,0,24.6254
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,16,1,0,0.3047
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,32,1,0,0.4201
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,64,1,0,0.6657
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,6144,1,0,39.0523
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,128,1,0,1.2177
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,256,1,0,2.3096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,512,1,0,4.8227
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,1024,1,0,9.9580
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,16,8192,1,0,57.8593
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,1536,1,0,15.5125
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,2048,1,0,21.4429
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,16,1,0,0.4980
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,32,1,0,0.7396
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,64,1,0,1.2946
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,3072,1,0,32.2246
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,128,1,0,2.3111
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,256,1,0,4.7440
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,32,4096,1,0,46.2427
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,512,1,0,9.5656
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,16,1,0,0.8951
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,1024,1,0,19.8723
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,32,1,0,1.4420
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,1536,1,0,28.7005
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,64,1,0,2.4723
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,128,1,0,4.7584
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,256,1,0,9.4043
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,64,2048,1,0,39.8893
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,16,1,0,1.7496
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,32,1,0,2.7749
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,512,1,0,19.0206
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,64,1,0,5.0645
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,128,1024,1,0,36.6719
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,128,1,0,9.4235
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,16,1,0,0.1138
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,32,1,0,0.1201
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,256,1,0,18.7345
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,64,1,0,0.1206
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,128,1,0,0.1285
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,128,256,512,1,0,35.0053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,256,1,0,0.1462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,512,1,0,0.1868
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,1536,1,0,0.3754
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,2048,1,0,0.4816
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,3072,1,0,0.7131
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,1024,1,0,0.2810
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,4096,1,0,0.9778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,6144,1,0,1.5928
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,8192,1,0,2.2882
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,10240,1,0,3.0465
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,12288,1,0,3.8914
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,16384,1,0,5.8128
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,16,1,0,0.1198
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,1,32768,1,0,17.2159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,32,1,0,0.1218
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,64,1,0,0.1300
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,128,1,0,0.1445
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,256,1,0,0.1837
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,512,1,0,0.2685
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,1024,1,0,0.4465
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,1536,1,0,0.6438
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,2048,1,0,0.8605
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,3072,1,0,1.3536
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,4096,1,0,1.8830
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,6144,1,0,3.0657
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,8192,1,0,4.4329
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,10240,1,0,5.9426
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,12288,1,0,7.6089
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,16,1,0,0.1254
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,16384,1,0,11.5978
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,32,1,0,0.1322
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,64,1,0,0.1490
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,128,1,0,0.1852
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,256,1,0,0.2628
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,512,1,0,0.4260
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,1024,1,0,0.8027
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,1536,1,0,1.2270
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,2,32768,1,0,34.0704
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,2048,1,0,1.6712
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,3072,1,0,2.6164
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,4096,1,0,3.6669
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,6144,1,0,6.0147
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,8192,1,0,8.8986
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,10240,1,0,11.9558
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,16,1,0,0.1381
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,12288,1,0,15.0233
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,32,1,0,0.1527
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,16384,1,0,22.9374
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,64,1,0,0.1913
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,128,1,0,0.2647
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,256,1,0,0.4199
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,512,1,0,0.7727
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,1024,1,0,1.5635
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,1536,1,0,2.3806
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,2048,1,0,3.2495
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,3072,1,0,5.1352
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,4,32768,1,0,67.7153
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,4096,1,0,7.4152
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,6144,1,0,11.8841
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,8192,1,0,17.6563
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,10240,1,0,23.7374
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,16,1,0,0.1615
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,32,1,0,0.1998
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,64,1,0,0.2712
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,12288,1,0,30.4381
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,128,1,0,0.4179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,256,1,0,0.7569
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,8,16384,1,0,45.5910
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,512,1,0,1.5056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,1024,1,0,3.0486
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,1536,1,0,4.6822
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,2048,1,0,6.6027
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,3072,1,0,10.1669
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,4096,1,0,14.7198
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,32,1,0,0.2908
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,16,1,0,0.2182
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,6144,1,0,24.2068
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,64,1,0,0.4362
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,128,1,0,0.7577
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,256,1,0,1.4823
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,512,1,0,2.9386
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,16,8192,1,0,35.1100
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,1024,1,0,6.1981
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,1536,1,0,9.2611
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,2048,1,0,13.1253
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,16,1,0,0.3293
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,32,1,0,0.4758
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,3072,1,0,20.7836
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,64,1,0,0.7951
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,128,1,0,1.4884
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,32,4096,1,0,29.2876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,256,1,0,2.8984
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,512,1,0,5.9868
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,1024,1,0,12.3166
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,16,1,0,0.5513
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,32,1,0,0.8680
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,1536,1,0,18.9981
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,64,1,0,1.5597
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,64,2048,1,0,26.0738
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,128,1,0,2.9053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,256,1,0,5.9179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,512,1,0,11.8815
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,16,1,0,1.0229
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,32,1,0,1.7100
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,64,1,0,3.0556
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,128,1024,1,0,24.4660
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,128,1,0,5.9302
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,256,1,0,11.7552
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,16,1,0,0.1114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,32,1,0,0.1098
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,64,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,128,1,0,0.1179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,256,1,0,0.1257
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,64,256,512,1,0,23.6579
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,512,1,0,0.1537
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,1024,1,0,0.2083
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,1536,1,0,0.2678
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,2048,1,0,0.3319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,3072,1,0,0.4660
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,4096,1,0,0.6077
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,6144,1,0,0.9464
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,8192,1,0,1.3469
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,10240,1,0,1.7890
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,12288,1,0,2.2739
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,16384,1,0,3.3556
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,1,32768,1,0,9.3718
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,16,1,0,0.1137
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,32,1,0,0.1136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,64,1,0,0.1180
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,128,1,0,0.1219
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,256,1,0,0.1501
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,512,1,0,0.2016
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,1024,1,0,0.3105
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,1536,1,0,0.4237
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,2048,1,0,0.5421
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,3072,1,0,0.8123
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,4096,1,0,1.1276
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,6144,1,0,1.8329
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,8192,1,0,2.6195
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,10240,1,0,3.4797
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,12288,1,0,4.3987
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,16384,1,0,6.4816
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,16,1,0,0.1159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,32,1,0,0.1160
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,2,32768,1,0,18.5563
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,64,1,0,0.1238
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,128,1,0,0.1483
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,256,1,0,0.1982
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,512,1,0,0.2972
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,1024,1,0,0.5061
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,1536,1,0,0.7453
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,2048,1,0,1.0081
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,3072,1,0,1.5919
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,4096,1,0,2.2221
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,6144,1,0,3.5733
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,8192,1,0,5.0942
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,10240,1,0,6.7697
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,12288,1,0,8.6084
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,16384,1,0,12.9312
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,16,1,0,0.1217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,32,1,0,0.1261
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,64,1,0,0.1500
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,128,1,0,0.1989
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,256,1,0,0.2918
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,4,32768,1,0,36.6784
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,512,1,0,0.4868
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,1024,1,0,0.9475
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,1536,1,0,1.4684
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,2048,1,0,2.0061
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,3072,1,0,3.1235
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,4096,1,0,4.3288
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,6144,1,0,7.0127
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,8192,1,0,10.2300
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,10240,1,0,13.6152
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,16,1,0,0.1305
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,32,1,0,0.1552
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,12288,1,0,16.9713
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,64,1,0,0.2048
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,8,16384,1,0,25.5608
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,128,1,0,0.2940
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,256,1,0,0.4789
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,512,1,0,0.9219
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,1024,1,0,1.8942
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,1536,1,0,2.8839
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,2048,1,0,3.9175
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,3072,1,0,6.1393
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,4096,1,0,8.7542
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,16,1,0,0.1650
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,32,1,0,0.2121
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,6144,1,0,13.8505
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,64,1,0,0.3021
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,16,8192,1,0,20.2455
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,128,1,0,0.4775
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,256,1,0,0.9067
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,512,1,0,1.8385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,1024,1,0,3.7122
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,1536,1,0,5.6752
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,2048,1,0,7.9428
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,3072,1,0,12.1239
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,16,1,0,0.2318
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,32,1,0,0.3211
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,64,1,0,0.4966
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,32,4096,1,0,17.3115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,128,1,0,0.9077
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,256,1,0,1.8172
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,512,1,0,3.6003
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,1024,1,0,7.5280
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,16,1,0,0.3574
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,1536,1,0,11.2372
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,32,1,0,0.5336
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,64,2048,1,0,15.7309
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,64,1,0,0.9418
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,128,1,0,1.8217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,256,1,0,3.5637
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,512,1,0,7.3170
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,16,1,0,0.6109
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,32,1,0,1.0193
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,128,1024,1,0,14.9067
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,64,1,0,1.8972
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,128,1,0,3.5695
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,256,1,0,7.2543
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,16,1,0,0.1074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,32,1,0,0.1106
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,64,1,0,0.1099
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,256,1,0,0.1199
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,128,1,0,0.1118
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,32,256,512,1,0,14.4778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,512,1,0,0.1320
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,1024,1,0,0.1763
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,1536,1,0,0.2136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,2048,1,0,0.2506
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,3072,1,0,0.3426
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,4096,1,0,0.4388
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,6144,1,0,0.6393
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,8192,1,0,0.8769
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,10240,1,0,1.1405
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,16384,1,0,2.0879
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,12288,1,0,1.4319
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,1,32768,1,0,5.5330
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,16,1,0,0.1097
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,32,1,0,0.1095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,64,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,128,1,0,0.1176
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,256,1,0,0.1305
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,512,1,0,0.1690
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,1024,1,0,0.2380
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,1536,1,0,0.3169
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,2048,1,0,0.3945
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,3072,1,0,0.5584
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,4096,1,0,0.7517
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,6144,1,0,1.1852
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,8192,1,0,1.6829
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,10240,1,0,2.2206
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,12288,1,0,2.7805
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,16384,1,0,4.0137
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,16,1,0,0.1115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,2,32768,1,0,10.7054
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,32,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,64,1,0,0.1156
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,128,1,0,0.1279
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,256,1,0,0.1648
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,512,1,0,0.2302
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,1024,1,0,0.3709
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,1536,1,0,0.5191
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,2048,1,0,0.6836
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,3072,1,0,1.0513
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,4096,1,0,1.4624
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,6144,1,0,2.3463
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,8192,1,0,3.2828
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,10240,1,0,4.3060
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,12288,1,0,5.3925
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,16384,1,0,7.8207
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,16,1,0,0.1150
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,32,1,0,0.1176
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,64,1,0,0.1279
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,4,32768,1,0,21.1749
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,256,1,0,0.2280
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,128,1,0,0.1629
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,512,1,0,0.3606
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,1024,1,0,0.6466
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,1536,1,0,0.9816
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,2048,1,0,1.3479
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,3072,1,0,2.1045
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,4096,1,0,2.8863
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,6144,1,0,4.5735
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,8192,1,0,6.4353
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,10240,1,0,8.4271
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,16,1,0,0.1197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,12288,1,0,10.5694
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,32,1,0,0.1303
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,64,1,0,0.1664
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,8,16384,1,0,15.5440
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,128,1,0,0.2315
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,256,1,0,0.3525
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,512,1,0,0.6288
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,1024,1,0,1.2858
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,1536,1,0,1.9804
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,2048,1,0,2.6730
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,3072,1,0,4.1234
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,4096,1,0,5.6812
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,6144,1,0,8.9956
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,16,1,0,0.1360
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,32,1,0,0.1715
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,16,8192,1,0,12.8361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,64,1,0,0.2345
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,128,1,0,0.3567
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,256,1,0,0.6201
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,512,1,0,1.2554
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,1024,1,0,2.5634
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,1536,1,0,3.8900
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,2048,1,0,5.2611
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,3072,1,0,8.1184
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,32,4096,1,0,11.3624
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,16,1,0,0.1805
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,32,1,0,0.2423
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,64,1,0,0.3632
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,128,1,0,0.6195
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,256,1,0,1.2432
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,512,1,0,2.5071
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,1024,1,0,5.0583
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,1536,1,0,7.6621
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,16,1,0,0.2612
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,64,2048,1,0,10.5687
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,32,1,0,0.3827
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,64,1,0,0.6373
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,128,1,0,1.2427
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,256,1,0,2.4868
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,512,1,0,4.9466
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,16,1,0,0.4215
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,128,1024,1,0,10.1525
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,32,1,0,0.6763
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,64,1,0,1.2812
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,128,1,0,2.4885
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,256,1,0,4.9074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,16,1,0,0.1094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,16,256,512,1,0,9.9413
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,32,1,0,0.1056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,64,1,0,0.1095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,128,1,0,0.1133
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,256,1,0,0.1197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,512,1,0,0.1279
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,1024,1,0,0.1583
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,1536,1,0,0.1905
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,2048,1,0,0.2219
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,3072,1,0,0.2844
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,4096,1,0,0.3409
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,6144,1,0,0.4861
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,8192,1,0,0.6558
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,10240,1,0,0.8397
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,12288,1,0,1.0299
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,16384,1,0,1.4457
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,1,32768,1,0,3.5683
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,16,1,0,0.1092
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,32,1,0,0.1094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,64,1,0,0.1136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,128,1,0,0.1156
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,256,1,0,0.1259
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,512,1,0,0.1524
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,1024,1,0,0.2085
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,1536,1,0,0.2648
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,2048,1,0,0.3158
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,3072,1,0,0.4375
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,4096,1,0,0.5769
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,6144,1,0,0.8786
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,8192,1,0,1.2002
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,10240,1,0,1.5604
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,16384,1,0,2.7473
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,12288,1,0,1.9295
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,2,32768,1,0,6.8560
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,16,1,0,0.1114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,32,1,0,0.1135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,64,1,0,0.1157
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,128,1,0,0.1219
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,256,1,0,0.1507
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,512,1,0,0.2027
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,1024,1,0,0.3028
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,1536,1,0,0.4129
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,2048,1,0,0.5338
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,3072,1,0,0.7972
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,4096,1,0,1.0778
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,6144,1,0,1.6878
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,8192,1,0,2.3359
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,10240,1,0,3.0391
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,12288,1,0,3.7699
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,16384,1,0,5.3371
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,16,1,0,0.1126
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,32,1,0,0.1153
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,4,32768,1,0,13.2924
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,64,1,0,0.1205
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,128,1,0,0.1462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,256,1,0,0.1994
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,512,1,0,0.2960
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,1024,1,0,0.5117
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,1536,1,0,0.7581
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,2048,1,0,1.0126
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,3072,1,0,1.5506
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,4096,1,0,2.1202
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,6144,1,0,3.3343
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,8192,1,0,4.6064
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,10240,1,0,5.9394
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,12288,1,0,7.3379
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,16,1,0,0.1155
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,32,1,0,0.1217
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,8,16384,1,0,10.3944
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,64,1,0,0.1462
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,128,1,0,0.1974
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,512,1,0,0.5023
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,256,1,0,0.2936
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,1024,1,0,0.9760
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,1536,1,0,1.4810
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,2048,1,0,2.0048
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,3072,1,0,3.0963
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,4096,1,0,4.2048
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,6144,1,0,6.5248
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,16,1,0,0.1261
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,32,1,0,0.1485
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,16,8192,1,0,9.0056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,64,1,0,0.1997
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,128,1,0,0.2957
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,256,1,0,0.4938
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,1024,1,0,1.9467
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,512,1,0,0.9573
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,1536,1,0,2.9681
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,2048,1,0,3.9955
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,3072,1,0,6.0711
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,32,4096,1,0,8.2526
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,16,1,0,0.1544
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,32,1,0,0.2051
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,64,1,0,0.3002
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,128,1,0,0.4974
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,256,1,0,0.9509
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,512,1,0,1.9196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,1024,1,0,3.8842
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,1536,1,0,5.8417
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,64,2048,1,0,7.8298
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,16,1,0,0.2136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,32,1,0,0.3086
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,64,1,0,0.5045
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,128,1,0,0.9508
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,256,1,0,1.9022
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,512,1,0,3.8305
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,16,1,0,0.3285
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,128,1024,1,0,7.6246
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,32,1,0,0.5231
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,64,1,0,0.9672
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,128,1,0,1.9102
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,256,1,0,3.8064
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,16,1,0,0.1033
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,32,1,0,0.1074
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,8,256,512,1,0,7.5251
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,64,1,0,0.1097
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,128,1,0,0.1119
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,512,1,0,0.1263
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,256,1,0,0.1175
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,1024,1,0,0.1528
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,1536,1,0,0.1810
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,2048,1,0,0.2092
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,3072,1,0,0.2640
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,4096,1,0,0.3149
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,6144,1,0,0.4215
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,8192,1,0,0.5361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,10240,1,0,0.6721
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,12288,1,0,0.8227
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,16384,1,0,1.1464
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,1,32768,1,0,2.5964
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,16,1,0,0.1088
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,32,1,0,0.1096
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,64,1,0,0.1098
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,128,1,0,0.1141
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,256,1,0,0.1239
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,512,1,0,0.1483
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,1024,1,0,0.1969
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,2048,1,0,0.2895
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,1536,1,0,0.2466
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,4096,1,0,0.4840
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,3072,1,0,0.3840
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,6144,1,0,0.7279
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,8192,1,0,0.9940
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,10240,1,0,1.2684
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,12288,1,0,1.5403
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,16384,1,0,2.1159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,16,1,0,0.1095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,32,1,0,0.1114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,2,32768,1,0,4.9305
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,64,1,0,0.1143
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,128,1,0,0.1196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,256,1,0,0.1444
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,512,1,0,0.1892
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,1024,1,0,0.2783
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,1536,1,0,0.3650
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,2048,1,0,0.4609
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,3072,1,0,0.6794
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,4096,1,0,0.9143
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,6144,1,0,1.3912
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,8192,1,0,1.8743
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,10240,1,0,2.3997
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,12288,1,0,2.9457
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,16384,1,0,4.1102
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,16,1,0,0.1115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,4,32768,1,0,9.5224
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,32,1,0,0.1134
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,64,1,0,0.1207
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,256,1,0,0.1861
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,128,1,0,0.1416
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,512,1,0,0.2694
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,1024,1,0,0.4475
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,1536,1,0,0.6566
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,2048,1,0,0.8712
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,3072,1,0,1.3093
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,4096,1,0,1.7489
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,6144,1,0,2.7039
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,8192,1,0,3.6999
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,10240,1,0,4.7245
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,12288,1,0,5.7819
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,16,1,0,0.1136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,8,16384,1,0,8.0063
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,32,1,0,0.1211
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,64,1,0,0.1409
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,128,1,0,0.1840
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,256,1,0,0.2670
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,512,1,0,0.4410
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,1024,1,0,0.8486
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,1536,1,0,1.2728
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,2048,1,0,1.6824
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,3072,1,0,2.5755
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,4096,1,0,3.4870
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,6144,1,0,5.3512
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,16,1,0,0.1216
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,16,8192,1,0,7.2537
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,32,1,0,0.1403
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,64,1,0,0.1853
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,128,1,0,0.2649
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,256,1,0,0.4400
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,512,1,0,0.8375
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,1024,1,0,1.6473
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,1536,1,0,2.5063
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,2048,1,0,3.3665
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,3072,1,0,5.1075
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,32,4096,1,0,6.8669
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,16,1,0,0.1442
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,32,1,0,0.1866
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,64,1,0,0.2682
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,128,1,0,0.4410
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,256,1,0,0.8313
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,512,1,0,1.6338
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,1024,1,0,3.3121
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,1536,1,0,4.9862
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,64,2048,1,0,6.6494
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,16,1,0,0.1920
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,32,1,0,0.2736
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,64,1,0,0.4444
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,128,1,0,0.8334
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,256,1,0,1.6193
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,512,1,0,3.2834
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,16,1,0,0.2819
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,128,1024,1,0,6.5424
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,32,1,0,0.4548
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,64,1,0,0.8376
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,128,1,0,1.6239
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,256,1,0,3.2684
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,16,1,0,0.1054
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,64,1,0,0.1094
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,32,1,0,0.1055
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,4,256,512,1,0,6.4942
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,128,1,0,0.1093
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,256,1,0,0.1159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,512,1,0,0.1262
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,1024,1,0,0.1532
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,1536,1,0,0.1794
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,2048,1,0,0.2046
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,3072,1,0,0.2577
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,4096,1,0,0.3041
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,6144,1,0,0.4037
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,8192,1,0,0.5125
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,10240,1,0,0.6287
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,12288,1,0,0.7409
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,16384,1,0,0.9741
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,16,1,0,0.1077
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,32,1,0,0.1056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,1,32768,1,0,2.1229
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,64,1,0,0.1095
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,128,1,0,0.1159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,256,1,0,0.1236
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,512,1,0,0.1448
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,2048,1,0,0.2793
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,3072,1,0,0.3680
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,1024,1,0,0.1920
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,1536,1,0,0.2369
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,4096,1,0,0.4592
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,6144,1,0,0.6649
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,8192,1,0,0.8714
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,10240,1,0,1.0939
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,12288,1,0,1.3313
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,16384,1,0,1.8225
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,2,32768,1,0,3.9555
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,16,1,0,0.1054
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,32,1,0,0.1119
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,64,1,0,0.1145
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,128,1,0,0.1196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,256,1,0,0.1442
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,512,1,0,0.1851
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,1024,1,0,0.2650
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,1536,1,0,0.3480
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,2048,1,0,0.4353
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,3072,1,0,0.6273
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,4096,1,0,0.8201
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,6144,1,0,1.2377
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,8192,1,0,1.6631
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,10240,1,0,2.1067
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,12288,1,0,2.5500
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,16384,1,0,3.4686
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,4,32768,1,0,7.6003
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,16,1,0,0.1115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,32,1,0,0.1136
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,64,1,0,0.1196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,128,1,0,0.1403
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,256,1,0,0.1822
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,512,1,0,0.2595
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,1024,1,0,0.4233
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,1536,1,0,0.6062
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,2048,1,0,0.7943
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,3072,1,0,1.1872
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,4096,1,0,1.5865
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,6144,1,0,2.3986
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,8192,1,0,3.2256
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,10240,1,0,4.0731
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,12288,1,0,4.9520
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,16,1,0,0.1132
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,8,16384,1,0,6.7687
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,32,1,0,0.1196
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,64,1,0,0.1401
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,128,1,0,0.1787
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,256,1,0,0.2563
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,512,1,0,0.4172
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,1024,1,0,0.7790
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,1536,1,0,1.1657
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,2048,1,0,1.5437
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,3072,1,0,2.3135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,4096,1,0,3.1034
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,6144,1,0,4.6980
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,16,1,0,0.1197
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,16,8192,1,0,6.3496
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,32,1,0,0.1385
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,64,1,0,0.1784
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,128,1,0,0.2543
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,256,1,0,0.4134
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,512,1,0,0.7716
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,1024,1,0,1.5219
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,1536,1,0,2.2743
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,2048,1,0,3.0308
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,3072,1,0,4.5751
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,32,4096,1,0,6.1373
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,16,1,0,0.1402
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,32,1,0,0.1790
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,64,1,0,0.2520
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,128,1,0,0.4116
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,256,1,0,0.7705
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,512,1,0,1.5104
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,1024,1,0,2.9981
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,1536,1,0,4.5031
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,64,2048,1,0,6.0281
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,16,1,0,0.1821
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,32,1,0,0.2556
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,64,1,0,0.4147
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,128,1,0,0.7738
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,256,1,0,1.5021
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,512,1,0,2.9808
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,128,1024,1,0,5.9596
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,16,1,0,0.2607
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,32,1,0,0.4181
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,64,1,0,0.7785
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,128,1,0,1.5092
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,256,1,0,2.9760
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,16,1,0,0.0980
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,2,256,512,1,0,5.9337
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,32,1,0,0.1055
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,64,1,0,0.1053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,128,1,0,0.1100
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,256,1,0,0.1160
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,512,1,0,0.1239
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,1024,1,0,0.1487
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,1536,1,0,0.1749
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,2048,1,0,0.2017
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,3072,1,0,0.2541
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,4096,1,0,0.2992
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,6144,1,0,0.3967
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,8192,1,0,0.5053
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,10240,1,0,0.6135
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,12288,1,0,0.7232
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,16384,1,0,0.9496
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,1,32768,1,0,1.8561
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,16,1,0,0.1013
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,32,1,0,0.1055
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,64,1,0,0.1098
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,128,1,0,0.1117
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,256,1,0,0.1200
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,512,1,0,0.1422
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,1024,1,0,0.1886
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,1536,1,0,0.2334
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,2048,1,0,0.2738
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,3072,1,0,0.3602
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,4096,1,0,0.4535
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,6144,1,0,0.6466
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,8192,1,0,0.8488
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,12288,1,0,1.2500
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,16384,1,0,1.6516
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,10240,1,0,1.0539
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,2,32768,1,0,3.4742
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,16,1,0,0.1063
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,32,1,0,0.1057
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,64,1,0,0.1109
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,128,1,0,0.1176
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,256,1,0,0.1399
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,512,1,0,0.1813
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,1024,1,0,0.2610
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,1536,1,0,0.3393
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,2048,1,0,0.4284
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,3072,1,0,0.6072
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,4096,1,0,0.7972
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,6144,1,0,1.1743
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,8192,1,0,1.5491
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,10240,1,0,1.9406
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,12288,1,0,2.3506
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,16384,1,0,3.1757
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,16,1,0,0.1056
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,4,32768,1,0,6.6179
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,32,1,0,0.1114
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,64,1,0,0.1169
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,128,1,0,0.1364
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,256,1,0,0.1795
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,512,1,0,0.2535
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,1024,1,0,0.4116
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,1536,1,0,0.5876
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,2048,1,0,0.7711
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,3072,1,0,1.1356
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,4096,1,0,1.4972
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,6144,1,0,2.2601
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,8192,1,0,3.0211
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,10240,1,0,3.7889
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,12288,1,0,4.5594
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,8,16384,1,0,6.1357
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,16,1,0,0.1115
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,32,1,0,0.1187
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,64,1,0,0.1361
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,128,1,0,0.1770
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,256,1,0,0.2508
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,512,1,0,0.4066
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,1024,1,0,0.7583
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,2048,1,0,1.4754
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,1536,1,0,1.1159
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,3072,1,0,2.2131
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,4096,1,0,2.9428
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,6144,1,0,4.4092
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,16,8192,1,0,5.8867
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,16,1,0,0.1175
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,32,1,0,0.1377
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,64,1,0,0.1753
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,128,1,0,0.2494
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,256,1,0,0.4043
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,512,1,0,0.7518
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,1024,1,0,1.4561
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,1536,1,0,2.1836
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,2048,1,0,2.9014
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,3072,1,0,4.3345
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,32,4096,1,0,5.7656
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,16,1,0,0.1364
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,64,1,0,0.2487
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,32,1,0,0.1752
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,128,1,0,0.4006
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,256,1,0,0.7482
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,512,1,0,1.4528
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,1024,1,0,2.8791
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,1536,1,0,4.2968
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,16,1,0,0.1753
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,64,2048,1,0,5.7024
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,32,1,0,0.2490
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,64,1,0,0.4028
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,128,1,0,0.7474
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,256,1,0,1.4466
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,512,1,0,2.8723
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,16,1,0,0.2509
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,64,1,0,0.7489
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,32,1,0,0.4041
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,128,1024,1,0,5.6712
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,128,1,0,1.4495
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,256,1,0,2.8627
VLLM,0.16.0,NVIDIA B200,mla_context_module,default,deepseek-ai/DeepSeek-V3,DeepseekV3ForCausalLM,fp8,fp8,nvfp4,1,256,512,1,0,5.6492
