framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,16,1,0,0.2483
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,32,1,0,0.2654
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,64,1,0,0.2957
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,256,1,0,0.5097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,128,1,0,0.3734
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1024,1,0,1.4399
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,512,1,0,0.8116
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1536,1,0,2.0856
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,2048,1,0,2.7487
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,3072,1,0,4.0738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,4096,1,0,5.4218
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,6144,1,0,8.0622
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,8192,1,0,10.8269
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,10240,1,0,13.5551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,16,1,0,0.2678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,32,1,0,0.3002
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,12288,1,0,16.6084
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,64,1,0,0.3704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,16384,1,0,22.3826
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,128,1,0,0.5135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,256,1,0,0.8102
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,0,0.2200
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,512,1,0,1.4358
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1024,1,0,2.7276
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1536,1,0,4.0164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,2048,1,0,5.3009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,32768,1,0,40.0504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,3072,1,0,7.9090
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,4096,1,0,10.5429
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,6144,1,0,16.0663
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,0,0.2253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,8192,1,0,21.4794
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,16,1,0,0.2978
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,32,1,0,0.3694
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,64,1,0,0.5121
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,10240,1,0,27.0103
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,128,1,0,0.8096
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,256,1,0,1.4363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,12288,1,0,32.6810
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,0,0.2181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,512,1,0,2.7166
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1024,1,0,5.2589
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,16384,1,0,36.7142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1536,1,0,7.7599
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,2048,1,0,10.3138
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,3072,1,0,15.6988
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,4096,1,0,20.9601
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,0,0.2388
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,6144,1,0,31.6435
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,16,1,0,0.3726
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,8192,1,0,34.9536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,32,1,0,0.5088
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,32768,1,0,79.7458
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,64,1,0,0.8097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,128,1,0,1.4341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,256,1,0,2.7189
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,10240,1,0,44.6599
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,512,1,0,5.2601
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,12288,1,0,54.2501
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1024,1,0,10.2455
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1536,1,0,15.4307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,2048,1,0,20.4921
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,16384,1,0,73.1620
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,3072,1,0,30.9708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,4096,1,0,33.8932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,0,0.2519
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,16,1,0,0.5129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,32,1,0,0.8097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,64,1,0,1.4371
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,128,1,0,2.7181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,6144,1,0,52.2150
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,256,1,0,5.2759
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,512,1,0,10.1982
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,8192,1,0,69.5553
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1024,1,0,20.4050
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1536,1,0,30.4531
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,10240,1,0,88.0842
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,2048,1,0,32.9765
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,32768,1,0,156.3464
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,0,0.2661
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,16,1,0,0.8113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,12288,1,0,104.7541
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,32,1,0,1.4361
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,64,1,0,2.7079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,3072,1,0,50.7664
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,128,1,0,5.2409
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,256,1,0,10.2069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,512,1,0,20.2682
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,4096,1,0,67.4995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,16384,1,0,143.1859
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1024,1,0,32.7289
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,16,1,0,1.4381
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1536,1,0,49.7585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,32,1,0,2.7087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,64,1,0,5.2396
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,6144,1,0,100.6478
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,128,1,0,10.1861
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,2048,1,0,65.6503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,256,1,0,20.2686
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,8192,1,0,136.0020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,0,0.2936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,512,1,0,32.7030
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,16,1,0,2.7131
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,3072,1,0,97.8939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,32,1,0,5.2448
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,64,1,0,10.1730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1024,1,0,65.1555
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,128,1,0,20.2867
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,4096,1,0,131.8967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,256,1,0,32.6196
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,16,1,0,5.2459
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,0,0.3677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1536,1,0,95.8582
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,32,1,0,10.1846
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,64,1,0,20.2682
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,512,1,0,64.9133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,0,0.1846
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,16,1,0,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,32,1,0,0.2319
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,64,1,0,0.2549
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,128,1,0,0.3140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,128,1,0,32.5824
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,0,0.5115
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,256,1,0,0.4250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,512,1,0,0.6473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,2048,1,0,128.1773
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1024,1,0,1.1590
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1536,1,0,1.6502
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,2048,1,0,2.1689
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,3072,1,0,3.1482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,4096,1,0,4.1883
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,6144,1,0,6.3588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,8192,1,0,8.5457
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,10240,1,0,10.7738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,256,1,0,64.8414
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,0,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,12288,1,0,12.9750
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,16,1,0,0.2326
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,64,1,0,0.3161
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,32,1,0,0.2553
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,128,1,0,0.4266
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,256,1,0,0.6480
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,16384,1,0,17.7076
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,512,1,0,1.1533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1024,1,0,2.1547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1536,1,0,3.0837
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,2048,1,0,4.0556
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1024,1,0,127.1232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,3072,1,0,6.1738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,4096,1,0,8.2716
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,32768,1,0,38.3908
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,6144,1,0,12.4584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,0,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,16,1,0,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,8192,1,0,16.7982
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,10240,1,0,21.1687
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,32,1,0,0.3181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,128,1,0,0.6501
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,64,1,0,0.4244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,256,1,0,1.1540
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,12288,1,0,25.7073
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,512,1,0,2.1432
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1024,1,0,4.0221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1536,1,0,6.0341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,16384,1,0,35.0013
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,512,1,0,126.9898
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,2048,1,0,8.0655
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,3072,1,0,12.1127
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,4096,1,0,16.2476
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,0,0.2112
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,6144,1,0,24.6484
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,16,1,0,0.3140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,32,1,0,0.4263
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,64,1,0,0.6488
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,32768,1,0,69.0379
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,8192,1,0,33.2462
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,128,1,0,1.1505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,256,1,0,2.1416
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,512,1,0,4.0018
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,10240,1,0,42.0238
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1024,1,0,7.9774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1536,1,0,11.8539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,12288,1,0,51.0722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,2048,1,0,15.7858
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,16384,1,0,62.4198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,3072,1,0,23.9249
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,0,0.2214
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,4096,1,0,32.1981
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,16,1,0,0.4261
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,32,1,0,0.6520
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,64,1,0,1.1537
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,128,1,0,2.1380
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,256,1,0,3.9978
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,6144,1,0,48.9332
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,512,1,0,7.9391
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,8192,1,0,58.9124
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1024,1,0,15.6496
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1536,1,0,23.4121
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,10240,1,0,74.8230
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,32768,1,0,138.0688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,2048,1,0,31.2704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,12288,1,0,91.0623
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,0,0.2322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,16,1,0,0.6527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,32,1,0,1.1555
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,64,1,0,2.1452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,128,1,0,3.9897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,3072,1,0,47.5617
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,256,1,0,7.9473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,512,1,0,15.5928
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,4096,1,0,56.7757
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,16384,1,0,124.6708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1024,1,0,31.0111
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,0,0.2562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,16,1,0,1.1541
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,32,1,0,2.1405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,64,1,0,3.9897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,6144,1,0,86.8645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1536,1,0,46.5597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,128,1,0,7.9241
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,2048,1,0,54.9987
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,256,1,0,15.5724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,0,0.3151
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,16,1,0,2.1378
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,8192,1,0,117.7345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,512,1,0,30.9043
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,32,1,0,3.9887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,3072,1,0,84.1993
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,64,1,0,7.9331
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,128,1,0,15.5318
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1024,1,0,54.5357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,0,0.4264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,256,1,0,30.9154
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,16,1,0,3.9983
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,4096,1,0,113.4692
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,32,1,0,7.9062
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1536,1,0,82.1052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,64,1,0,15.5325
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,0,0.1767
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,16,1,0,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,32,1,0,0.2143
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,512,1,0,54.3590
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,64,1,0,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,128,1,0,0.2815
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,128,1,0,30.8174
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,256,1,0,0.3716
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,512,1,0,0.5406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1024,1,0,0.9133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,2048,1,0,1.7050
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1536,1,0,1.3136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,2048,1,0,109.8516
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,3072,1,0,2.5002
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,4096,1,0,3.3002
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,6144,1,0,5.0169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,8192,1,0,6.7410
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,10240,1,0,8.5451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,256,1,0,54.1903
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,12288,1,0,10.3575
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,0,0.1756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,16,1,0,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,32,1,0,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,64,1,0,0.2781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,16384,1,0,14.1585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,128,1,0,0.3708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,256,1,0,0.5398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,512,1,0,0.9156
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1024,1,0,1.6843
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1536,1,0,2.4353
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,2048,1,0,3.1757
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,3072,1,0,4.8269
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1024,1,0,108.9001
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,4096,1,0,6.4799
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,32768,1,0,31.3927
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,6144,1,0,9.8180
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,8192,1,0,13.2785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,0,0.1884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,16,1,0,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,10240,1,0,16.7861
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,32,1,0,0.2777
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,64,1,0,0.3700
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,128,1,0,0.5403
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,12288,1,0,20.4341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,256,1,0,0.9107
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,512,1,0,1.6788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1024,1,0,3.1408
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,512,1,0,108.4056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1536,1,0,4.7071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,16384,1,0,28.0615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,2048,1,0,6.2437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,3072,1,0,9.4403
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,4096,1,0,12.7616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,0,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,6144,1,0,19.3808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,16,1,0,0.2774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,32,1,0,0.3702
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,64,1,0,0.5402
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,8192,1,0,26.2794
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,128,1,0,0.9115
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,256,1,0,1.6790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,32768,1,0,62.4354
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,10240,1,0,33.3154
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,512,1,0,3.1321
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1024,1,0,6.1709
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,12288,1,0,40.6722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1536,1,0,9.1882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,2048,1,0,12.2682
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,3072,1,0,18.7187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,16384,1,0,55.7299
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,4096,1,0,25.2147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,0,0.2000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,16,1,0,0.3734
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,32,1,0,0.5384
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,64,1,0,0.9121
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,128,1,0,1.6770
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,6144,1,0,38.6127
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,256,1,0,3.1463
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,512,1,0,6.1312
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1024,1,0,12.1466
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,8192,1,0,52.2816
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1536,1,0,18.2102
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,10240,1,0,66.3772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,2048,1,0,24.2719
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,32768,1,0,117.2195
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,0,0.2101
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,16,1,0,0.5405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,32,1,0,0.9093
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,12288,1,0,80.8762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,64,1,0,1.6728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,128,1,0,3.1157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,3072,1,0,37.2051
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,256,1,0,6.1329
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,512,1,0,12.0769
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,4096,1,0,50.1767
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,16384,1,0,104.0801
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1024,1,0,24.0165
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,0,0.2260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,16,1,0,0.9143
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,32,1,0,1.6723
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1536,1,0,36.1701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,64,1,0,3.1262
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,6144,1,0,76.8162
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,128,1,0,6.1053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,2048,1,0,48.2949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,256,1,0,12.0981
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,0,0.2763
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,512,1,0,23.9394
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,16,1,0,1.6751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,8192,1,0,96.9436
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,32,1,0,3.1232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,64,1,0,6.1190
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,3072,1,0,73.9726
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,128,1,0,12.0247
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1024,1,0,47.8720
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,0,0.3708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,256,1,0,23.9056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,16,1,0,3.1305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,4096,1,0,92.8800
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,32,1,0,6.1063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1536,1,0,71.9433
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,64,1,0,12.0568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,512,1,0,47.5773
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,128,1,0,23.8388
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,0,0.1521
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,16,1,0,0.1818
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,32,1,0,0.1875
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,64,1,0,0.2038
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,128,1,0,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,2048,1,0,89.2071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,256,1,0,0.2494
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,512,1,0,0.3350
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1024,1,0,0.5360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1536,1,0,0.7887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,2048,1,0,1.0772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,3072,1,0,1.7095
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,4096,1,0,2.3552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,256,1,0,47.7055
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,6144,1,0,3.6988
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,8192,1,0,5.1364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,10240,1,0,6.5529
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,0,0.1498
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,12288,1,0,7.9464
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,16,1,0,0.1938
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,32,1,0,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,64,1,0,0.2109
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1024,1,0,88.1274
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,16384,1,0,11.2587
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,128,1,0,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,256,1,0,0.3240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,512,1,0,0.4960
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1024,1,0,0.8970
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1536,1,0,1.4003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,2048,1,0,1.9893
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,3072,1,0,3.2421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,4096,1,0,4.5339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,6144,1,0,7.2376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,32768,1,0,25.6512
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,8192,1,0,10.1017
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,0,0.1550
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,16,1,0,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,10240,1,0,12.9779
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,512,1,0,87.6443
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,64,1,0,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,128,1,0,0.3167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,12288,1,0,16.0205
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,32,1,0,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,256,1,0,0.4714
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,512,1,0,0.8136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1024,1,0,1.6329
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,16384,1,0,21.9798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1536,1,0,2.6827
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,2048,1,0,3.8072
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,3072,1,0,6.3561
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,4096,1,0,8.9725
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,32768,1,0,31.0577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,6144,1,0,14.5168
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,0,0.1741
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,16,1,0,0.2123
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,32,1,0,0.2467
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,8192,1,0,20.0417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,64,1,0,0.3142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,128,1,0,0.4597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,256,1,0,0.7676
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,512,1,0,1.4645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,10240,1,0,25.6038
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1024,1,0,3.1454
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1536,1,0,5.1123
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,16384,1,0,24.4397
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,12288,1,0,32.0053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,2048,1,0,7.5257
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,3072,1,0,12.7982
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,0,0.1796
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,4096,1,0,18.0459
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,16,1,0,0.2443
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,32,1,0,0.3154
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,64,1,0,0.4597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,8192,1,0,21.0410
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,128,1,0,0.7434
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,6144,1,0,28.7150
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,256,1,0,1.3736
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,10240,1,0,27.3024
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,512,1,0,2.8100
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1024,1,0,6.2006
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,32768,1,0,61.7950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,12288,1,0,34.0687
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1536,1,0,10.2920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,0,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,2048,1,0,14.9497
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,16,1,0,0.3183
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,32,1,0,0.4557
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,64,1,0,0.7446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,128,1,0,1.3228
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,4096,1,0,19.0678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,16384,1,0,48.6307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,256,1,0,2.6376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,3072,1,0,25.3533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,512,1,0,5.4933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,0,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,16,1,0,0.4601
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1024,1,0,12.3306
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,6144,1,0,30.0897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,32,1,0,0.7406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,64,1,0,1.3282
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,2048,1,0,17.5141
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1536,1,0,20.4286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,128,1,0,2.5263
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,256,1,0,5.1112
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,0,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,3072,1,0,27.6029
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,512,1,0,10.9249
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,16,1,0,0.7464
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,32,1,0,1.3241
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,64,1,0,2.5265
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1024,1,0,17.2923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,128,1,0,4.9065
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,4096,1,0,37.9800
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,0,0.2419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,16,1,0,1.3320
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,256,1,0,10.0707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,32,1,0,2.5300
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,64,1,0,4.9149
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1536,1,0,26.0633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,16,1,0,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,0,0.1306
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,512,1,0,17.1855
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,32,1,0,0.1729
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,64,1,0,0.1878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,128,1,0,9.6649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,256,1,0,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,512,1,0,0.3103
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,128,1,0,0.1983
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1024,1,0,0.4952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1536,1,0,0.7367
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,2048,1,0,1.0134
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,3072,1,0,1.6557
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,256,1,0,17.1449
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,4096,1,0,2.2692
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,6144,1,0,3.6087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,8192,1,0,4.9824
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,0,0.1447
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,10240,1,0,6.4886
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,16,1,0,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,12288,1,0,7.8667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,64,1,0,0.2046
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,128,1,0,0.2324
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,32,1,0,0.1868
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,16384,1,0,11.0218
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,256,1,0,0.3007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,512,1,0,0.4484
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1024,1,0,0.8243
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1536,1,0,1.2798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,2048,1,0,1.8477
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,3072,1,0,3.1479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,4096,1,0,4.4602
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,6144,1,0,7.1396
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,32768,1,0,25.5804
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,0,0.1439
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,8192,1,0,9.8396
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,16,1,0,0.1882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,10240,1,0,12.6731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,32,1,0,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,64,1,0,0.2321
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,128,1,0,0.2920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,256,1,0,0.4251
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,12288,1,0,15.5369
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,512,1,0,0.7229
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1024,1,0,1.4820
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1536,1,0,2.4136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,2048,1,0,3.5730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,16384,1,0,21.8359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,3072,1,0,6.1978
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,4096,1,0,8.8357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,32768,1,0,28.2178
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,6144,1,0,14.2151
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,0,0.1527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,16,1,0,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,32,1,0,0.2317
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,8192,1,0,19.5401
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,64,1,0,0.2908
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,128,1,0,0.4082
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,256,1,0,0.6755
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,512,1,0,1.2779
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,10240,1,0,25.2867
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,16384,1,0,21.5980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1024,1,0,2.8070
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1536,1,0,4.7432
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,2048,1,0,7.1120
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,12288,1,0,31.2972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,3072,1,0,12.2799
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,0,0.1636
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,16,1,0,0.2356
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,4096,1,0,17.3801
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,32,1,0,0.2910
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,8192,1,0,18.1635
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,64,1,0,0.4097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,128,1,0,0.6519
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,256,1,0,1.1790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,512,1,0,2.4415
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,10240,1,0,23.7772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,6144,1,0,28.3178
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1024,1,0,5.4913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,32768,1,0,56.1477
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1536,1,0,9.4480
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,12288,1,0,29.8378
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,0,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,16,1,0,0.2923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,32,1,0,0.4093
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,2048,1,0,14.0300
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,64,1,0,0.6523
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,128,1,0,1.1446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,256,1,0,2.2476
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,512,1,0,4.7675
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,4096,1,0,16.2278
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,3072,1,0,24.4168
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1024,1,0,10.9358
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,6144,1,0,25.8170
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,0,0.1865
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,16,1,0,0.4106
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,32,1,0,0.6527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,2048,1,0,14.6671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,64,1,0,1.1421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1536,1,0,18.9780
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,128,1,0,2.1507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,256,1,0,4.5022
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,0,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,16,1,0,0.6586
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,512,1,0,9.4858
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,32,1,0,1.1342
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,64,1,0,2.1591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,128,1,0,4.1727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1024,1,0,14.4366
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,0,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,16,1,0,1.1405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,256,1,0,8.5260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,32,1,0,2.1582
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,4096,1,0,32.2591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,64,1,0,4.1813
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,0,0.1195
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,512,1,0,14.3342
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,16,1,0,0.1605
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,32,1,0,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,128,1,0,8.1633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,64,1,0,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,128,1,0,0.1950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,512,1,0,0.2963
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1024,1,0,0.4783
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,256,1,0,0.2301
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1536,1,0,0.6981
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,2048,1,0,0.9663
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,3072,1,0,1.7498
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,256,1,0,14.2873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,4096,1,0,2.1899
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,6144,1,0,3.4719
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,8192,1,0,4.8986
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,0,0.1398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,16,1,0,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,10240,1,0,6.2701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,32,1,0,0.1799
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,12288,1,0,7.7074
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,64,1,0,0.1968
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,128,1,0,0.2268
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,256,1,0,0.2894
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,16384,1,0,10.7794
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,512,1,0,0.4222
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1024,1,0,0.7769
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1536,1,0,1.2297
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,2048,1,0,1.7749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,3072,1,0,3.0116
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,4096,1,0,4.2264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,6144,1,0,6.8051
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,32768,1,0,24.8481
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,8192,1,0,9.6312
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,0,0.1429
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,16,1,0,0.1765
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,10240,1,0,12.3468
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,32,1,0,0.1957
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,64,1,0,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,12288,1,0,15.2515
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,128,1,0,0.2793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,256,1,0,0.4024
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,512,1,0,1.1784
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1024,1,0,1.3977
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1536,1,0,2.3155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,16384,1,0,21.3086
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,2048,1,0,3.3882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,3072,1,0,6.0021
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,32768,1,0,26.7820
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,4096,1,0,8.3146
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,0,0.1535
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,6144,1,0,13.6484
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,16,1,0,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,32,1,0,0.2249
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,64,1,0,0.2817
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,128,1,0,0.3878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,8192,1,0,19.0768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,256,1,0,0.6289
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,512,1,0,1.3548
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1024,1,0,2.6371
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,16384,1,0,20.2070
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,10240,1,0,24.7368
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1536,1,0,4.6163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,2048,1,0,6.6691
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,12288,1,0,30.4321
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,3072,1,0,11.8006
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,0,0.1625
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,16,1,0,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,4096,1,0,16.8283
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,32,1,0,0.2796
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,64,1,0,0.3880
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,128,1,0,0.6092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,256,1,0,1.0973
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,8192,1,0,16.7705
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,512,1,0,2.4020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,10240,1,0,22.0310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1024,1,0,5.1562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,6144,1,0,27.2078
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1536,1,0,8.9239
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,12288,1,0,27.7071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,0,0.1731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,16,1,0,0.2818
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,32,1,0,0.3874
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,2048,1,0,13.4636
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,64,1,0,0.6084
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,128,1,0,1.0513
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,4096,1,0,14.8232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,256,1,0,2.0714
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,512,1,0,4.4149
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,3072,1,0,23.5433
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1024,1,0,10.3487
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,0,0.1844
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,16,1,0,0.3895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,32,1,0,0.6085
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,64,1,0,1.0497
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,2048,1,0,13.2739
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,128,1,0,1.9808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1536,1,0,17.8678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,256,1,0,4.0056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,0,0.1902
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,512,1,0,8.8428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,3072,1,0,21.2564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,16,1,0,0.6123
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,32,1,0,1.0518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,64,1,0,1.9770
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1024,1,0,13.0264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,128,1,0,3.8307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,0,0.2224
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,16,1,0,1.0581
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,256,1,0,7.8166
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,32,1,0,1.9797
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1536,1,0,19.6663
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,64,1,0,3.8246
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,512,1,0,12.9080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,0,0.1195
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,128,1,0,7.4611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,16,1,0,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,32,1,0,0.1705
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,64,1,0,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,128,1,0,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,256,1,0,0.2245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,512,1,0,0.2919
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,256,1,0,12.8858
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1024,1,0,0.4639
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1536,1,0,0.6826
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,2048,1,0,0.9460
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,3072,1,0,1.5471
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1024,1,0,25.9797
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,4096,1,0,2.3022
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,6144,1,0,3.4299
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,8192,1,0,4.7253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,10240,1,0,6.0804
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,0,0.1351
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,512,1,0,25.7405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,12288,1,0,7.4906
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,16,1,0,0.1591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,32,1,0,0.1793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,64,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,16384,1,0,10.6402
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,128,1,0,0.2204
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,256,1,0,0.2820
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,512,1,0,0.4172
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1024,1,0,0.7496
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1536,1,0,1.3735
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,2048,1,0,1.7165
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,3072,1,0,2.9725
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,4096,1,0,4.1643
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,6144,1,0,6.6746
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,0,0.1339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,8192,1,0,9.4178
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,32768,1,0,24.5879
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,16,1,0,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,32,1,0,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,64,1,0,0.2185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,10240,1,0,12.0798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,128,1,0,0.2734
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,256,1,0,0.3888
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,12288,1,0,14.8489
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,512,1,0,0.6554
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1024,1,0,1.3476
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1536,1,0,2.2240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,2048,1,0,3.3087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,16384,1,0,20.7577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,3072,1,0,5.8750
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,4096,1,0,8.3024
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,32768,1,0,26.1456
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,0,0.1443
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,16,1,0,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,6144,1,0,13.2868
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,32,1,0,0.2217
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,64,1,0,0.2735
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,128,1,0,0.3776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,256,1,0,1.1675
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,8192,1,0,18.8867
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,512,1,0,1.1458
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1024,1,0,2.5804
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,16384,1,0,19.5045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1536,1,0,4.3761
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,10240,1,0,24.1257
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,2048,1,0,6.6504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,12288,1,0,29.7667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,3072,1,0,11.3802
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,0,0.1606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,16,1,0,0.2235
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,32,1,0,0.2738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,4096,1,0,16.7146
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,64,1,0,0.3768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,128,1,0,0.5857
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,256,1,0,1.2284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,8192,1,0,16.1248
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,512,1,0,2.1779
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,10240,1,0,21.1677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1024,1,0,5.0048
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,6144,1,0,26.8933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1536,1,0,8.7823
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,0,0.1645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,16,1,0,0.2757
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,32,1,0,0.3771
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,2048,1,0,12.9429
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,64,1,0,0.5882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,128,1,0,1.0068
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,4096,1,0,14.1695
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,256,1,0,1.9757
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,512,1,0,4.2730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,3072,1,0,22.9937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,0,0.1786
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,16,1,0,0.3795
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1024,1,0,10.1055
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,32,1,0,0.5867
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,64,1,0,1.0077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,128,1,0,2.0383
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,2048,1,0,12.5572
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,256,1,0,3.8318
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1536,1,0,17.4037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,512,1,0,8.4534
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,16,1,0,0.5905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,32,1,0,1.0231
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,64,1,0,1.8897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1024,1,0,12.3482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,128,1,0,3.8016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,0,0.2186
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,16,1,0,1.0132
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1536,1,0,18.6762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,256,1,0,7.4844
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,32,1,0,1.8928
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,64,1,0,3.8051
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,512,1,0,12.2187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,0,0.1138
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,16,1,0,0.1552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,32,1,0,0.1659
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,128,1,0,7.1470
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,64,1,0,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,128,1,0,0.1862
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,256,1,0,0.2242
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,512,1,0,0.2875
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1024,1,0,0.4560
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1536,1,0,0.6759
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,256,1,0,12.1799
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1024,1,0,24.5731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,2048,1,0,0.9442
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,3072,1,0,1.5361
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,4096,1,0,2.1477
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,6144,1,0,3.5402
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,8192,1,0,4.6563
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,0,0.1277
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,10240,1,0,6.0611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,16,1,0,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,12288,1,0,7.5647
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,32,1,0,0.1757
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,64,1,0,0.1848
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,128,1,0,0.2141
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,256,1,0,0.2772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,16384,1,0,10.5169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,512,1,0,0.4064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1024,1,0,1.2009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1536,1,0,1.1726
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,2048,1,0,1.6912
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,3072,1,0,2.9392
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,4096,1,0,4.1554
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,6144,1,0,6.5969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,0,0.1420
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,32768,1,0,24.4792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,8192,1,0,9.3901
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,16,1,0,0.1795
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,32,1,0,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,10240,1,0,11.9033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,12288,1,0,14.8489
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,64,1,0,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,128,1,0,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,256,1,0,0.3860
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,512,1,0,0.6472
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1024,1,0,1.3079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1536,1,0,2.1836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,16384,1,0,20.6381
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,2048,1,0,3.2539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,3072,1,0,5.7531
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,4096,1,0,8.2369
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,32768,1,0,25.8357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,0,0.1484
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,16,1,0,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,6144,1,0,13.2808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,32,1,0,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,64,1,0,0.2682
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,8192,1,0,18.3503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,128,1,0,0.3696
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,256,1,0,0.6015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,512,1,0,1.2885
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1024,1,0,2.5137
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,16384,1,0,19.2097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,10240,1,0,23.6580
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1536,1,0,4.2872
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,2048,1,0,6.5028
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,12288,1,0,29.5177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,3072,1,0,11.4274
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,0,0.1594
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,16,1,0,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,4096,1,0,16.4163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,8192,1,0,15.7727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,32,1,0,0.2711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,64,1,0,0.3689
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,128,1,0,0.5760
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,256,1,0,1.1965
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,10240,1,0,20.7349
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,512,1,0,2.1322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,6144,1,0,26.3995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1024,1,0,4.8897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,32768,1,0,51.3413
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,12288,1,0,26.2171
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1536,1,0,8.5461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,0,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,16,1,0,0.2709
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,2048,1,0,12.9238
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,32,1,0,0.3699
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,64,1,0,0.5761
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,128,1,0,0.9890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,256,1,0,1.9376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,4096,1,0,13.8000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,512,1,0,4.1571
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,0,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,3072,1,0,22.7839
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,16,1,0,0.3708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,32,1,0,0.5762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1024,1,0,9.9342
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,64,1,0,0.9895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,2048,1,0,12.2521
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,128,1,0,1.8367
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,256,1,0,3.9032
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1536,1,0,17.1677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,0,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,16,1,0,0.5782
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,512,1,0,8.2015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,32,1,0,0.9912
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,64,1,0,1.9899
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1024,1,0,12.0091
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,4096,1,0,27.4231
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,128,1,0,3.5652
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,0,0.2186
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,256,1,0,7.4688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1536,1,0,18.1833
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,16,1,0,1.0020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,32,1,0,1.8535
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,64,1,0,3.5701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,512,1,0,11.9060
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,1,1,0,0.2214
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,128,1,0,7.1218
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,256,1,0,11.8652
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1024,1,0,23.9867
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,16,1,0,0.2583
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,32,1,0,0.2895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,64,1,0,0.3433
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,128,1,0,0.3802
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,256,1,0,0.5263
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,512,1,0,1.2438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,1024,1,0,1.5203
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,1536,1,0,2.1972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,2048,1,0,2.8975
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,3072,1,0,4.3014
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,4096,1,0,5.8742
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,6144,1,0,8.5037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,1,1,0,0.2296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,8192,1,0,11.7857
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,16,1,0,0.2926
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,32,1,0,0.3447
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,10240,1,0,14.4517
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,64,1,0,0.3792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,12288,1,0,18.0601
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,128,1,0,0.5213
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,512,1,0,1.4961
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,1024,1,0,3.0035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,1536,1,0,4.1820
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,256,1,0,0.8367
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,16384,1,0,23.7092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,2048,1,0,5.7230
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,3072,1,0,8.2435
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,4096,1,0,11.3764
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,32768,1,0,42.6169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,1,1,0,0.2434
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,6144,1,0,16.9380
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,16,1,0,0.3399
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,32,1,0,0.3782
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,64,1,0,1.2005
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,8192,1,0,22.5585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,128,1,0,0.8344
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,256,1,0,1.4864
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,10240,1,0,29.3092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,512,1,0,2.9863
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,12288,1,0,34.2008
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,1024,1,0,5.6518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,16384,1,0,38.7448
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,1536,1,0,8.0451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,2048,1,0,11.0833
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,3072,1,0,16.8952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,4096,1,0,21.9124
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,1,1,0,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,16,1,0,0.3794
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,6144,1,0,32.9602
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,8192,1,0,36.6937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,32,1,0,1.1969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,32768,1,0,83.6443
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,64,1,0,0.8327
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,128,1,0,1.4873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,256,1,0,2.9708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,10240,1,0,46.7793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,512,1,0,5.5683
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,12288,1,0,56.8426
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,1024,1,0,10.9821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,1536,1,0,16.5685
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,2048,1,0,21.3683
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,16384,1,0,76.4595
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,3072,1,0,33.0672
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,4096,1,0,35.6985
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,1,1,0,0.2747
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,16,1,0,0.5199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,32,1,0,0.8310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,64,1,0,1.4840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,128,1,0,2.8065
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,256,1,0,5.5571
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,6144,1,0,54.8290
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,512,1,0,10.9128
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,1024,1,0,21.8204
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,8192,1,0,72.9679
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,1536,1,0,31.6920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,2048,1,0,34.6697
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,1,1,0,0.2993
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,16,1,0,0.8349
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,10240,1,0,92.3790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,32,1,0,1.4833
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,64,1,0,2.8013
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,32768,1,0,163.2209
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,128,1,0,5.6219
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,256,1,0,10.9464
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,3072,1,0,54.9091
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,12288,1,0,109.8998
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,1,1,0,0.3592
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,512,1,0,21.1187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,16,1,0,1.4869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,32,1,0,2.9606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,1024,1,0,34.3170
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,64,1,0,5.5528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,4096,1,0,71.0398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,128,1,0,10.9000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,1,1,0,0.4504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,256,1,0,21.1766
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,1536,1,0,52.2214
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,16,1,0,2.8019
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,32,1,0,5.6046
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,64,1,0,10.6886
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,2048,1,0,68.9851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,512,1,0,34.1862
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,1,1,0,1.1781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,128,1,0,21.1060
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,16,1,0,5.6164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,32,1,0,10.6927
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,256,1,0,34.1621
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,1024,1,0,68.2356
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,1,1,0,0.1834
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,64,1,0,21.0668
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,3072,1,0,103.0656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,16,1,0,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,32,1,0,0.2586
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,64,1,0,0.3076
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,128,1,0,0.3271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,512,1,0,0.6931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,256,1,0,0.4764
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,1024,1,0,1.4854
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,1536,1,0,1.8086
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,128,1,0,34.1646
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,2048,1,0,2.3866
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,3072,1,0,3.4662
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,4096,1,0,4.7451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,6144,1,0,7.1393
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,8192,1,0,9.3730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,512,1,0,68.0728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,1,1,0,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,10240,1,0,12.0266
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,16,1,0,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,12288,1,0,14.2503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,32,1,0,0.3079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,64,1,0,0.3294
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,128,1,0,0.4451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,256,1,0,1.2301
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,512,1,0,1.2437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,16384,1,0,19.5163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,1024,1,0,2.3313
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,1536,1,0,3.3533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,256,1,0,67.9781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,2048,1,0,4.4252
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,3072,1,0,6.7016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,4096,1,0,9.1386
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,1,1,0,0.2069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,6144,1,0,13.5007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,32768,1,0,41.8193
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,16,1,0,0.3036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,8192,1,0,18.1826
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,32,1,0,0.3243
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,64,1,0,0.4429
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,128,1,0,1.2224
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,10240,1,0,22.9279
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,256,1,0,1.2317
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,512,1,0,2.3013
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,1024,1,0,4.3425
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,12288,1,0,27.9699
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,1536,1,0,6.6660
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,2048,1,0,8.6790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,16384,1,0,37.8310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,3072,1,0,13.0536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,4096,1,0,17.6811
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,1,1,0,0.2041
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,16,1,0,0.3250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,6144,1,0,26.6991
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,32,1,0,0.4423
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,64,1,0,0.6792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,128,1,0,1.2281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,256,1,0,2.2963
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,8192,1,0,35.9618
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,512,1,0,4.7514
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,32768,1,0,74.9271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,1024,1,0,8.7157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,10240,1,0,45.4322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,1536,1,0,12.7503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,2048,1,0,17.1433
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,12288,1,0,55.0134
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,3072,1,0,26.0799
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,1,1,0,0.2223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,16,1,0,0.4437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,32,1,0,0.6784
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,16384,1,0,67.6796
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,64,1,0,1.2263
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,4096,1,0,34.8124
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,128,1,0,2.2925
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,256,1,0,4.3159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,512,1,0,8.5564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,6144,1,0,53.7897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,1024,1,0,16.9880
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,8192,1,0,65.0771
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,1,1,0,0.2366
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,1536,1,0,26.8503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,16,1,0,0.7519
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,32,1,0,1.2265
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,2048,1,0,33.9197
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,64,1,0,2.4794
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,128,1,0,4.4456
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,256,1,0,8.5360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,10240,1,0,81.3514
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,3072,1,0,51.6091
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,512,1,0,16.9105
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,1,1,0,0.2684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,16,1,0,1.2295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,32,1,0,2.2859
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,4096,1,0,62.2772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,64,1,0,4.2884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,1024,1,0,33.6676
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,128,1,0,8.5208
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,256,1,0,16.9045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,1536,1,0,50.4526
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,1,1,0,0.3165
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,16,1,0,2.5648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,32,1,0,4.2919
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,2048,1,0,63.3149
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,512,1,0,33.3370
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,64,1,0,8.5231
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,1,1,0,0.4440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,128,1,0,16.8749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,16,1,0,4.3023
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,1024,1,0,59.4814
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,32,1,0,8.5082
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,256,1,0,33.4436
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,3072,1,0,95.2542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,1,1,0,0.1632
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,64,1,0,19.1577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,16,1,0,0.2108
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,32,1,0,0.2383
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,64,1,0,0.2750
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,128,1,0,0.2909
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,256,1,0,0.3891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,512,1,0,0.5708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,1024,1,0,0.9834
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,1536,1,0,1.4202
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,1536,1,0,89.8845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,2048,1,0,1.8410
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,512,1,0,59.2878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,4096,1,0,3.5807
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,3072,1,0,2.7086
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,128,1,0,33.3317
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,6144,1,0,5.5831
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,8192,1,0,7.2966
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,1,1,0,0.1845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,10240,1,0,9.3552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,16,1,0,0.2365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,12288,1,0,11.1663
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,32,1,0,0.2781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,64,1,0,0.2869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,128,1,0,0.3843
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,16384,1,0,15.3500
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,256,1,0,1.4063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,512,1,0,0.9615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,1024,1,0,1.7876
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,1536,1,0,2.5869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,256,1,0,59.2036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,2048,1,0,3.3974
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,3072,1,0,5.3122
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,4096,1,0,7.0307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,32768,1,0,33.7258
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,6144,1,0,10.4341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,1,1,0,0.1909
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,8192,1,0,14.2776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,16,1,0,0.2780
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,10240,1,0,18.0136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,32,1,0,0.2901
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,64,1,0,0.3823
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,128,1,0,0.5575
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,1024,1,0,119.0352
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,256,1,0,0.9504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,12288,1,0,21.9053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,512,1,0,1.7616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,1024,1,0,3.3288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,1536,1,0,4.9528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,16384,1,0,29.9018
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,2048,1,0,6.5924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,3072,1,0,9.9856
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,4096,1,0,13.4431
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,1,1,0,0.1926
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,6144,1,0,20.4903
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,16,1,0,0.2899
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,32,1,0,0.3807
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,64,1,0,0.5552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,128,1,0,0.9498
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,8192,1,0,27.8929
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,256,1,0,1.7517
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,512,1,0,3.2848
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,1024,1,0,6.4811
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,10240,1,0,35.2557
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,32768,1,0,65.8737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,1536,1,0,9.6732
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,12288,1,0,42.9701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,2048,1,0,13.0712
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,3072,1,0,19.9633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,1,1,0,0.2025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,16384,1,0,58.7817
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,16,1,0,0.3822
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,4096,1,0,26.6755
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,32,1,0,0.5559
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,64,1,0,0.9474
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,128,1,0,1.7433
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,256,1,0,3.2910
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,512,1,0,6.4474
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,6144,1,0,40.9311
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,1024,1,0,12.7674
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,1536,1,0,19.3319
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,8192,1,0,55.2581
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,2048,1,0,25.7954
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,1,1,0,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,16,1,0,0.5571
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,32,1,0,0.9457
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,64,1,0,1.7442
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,128,1,0,3.2855
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,10240,1,0,70.1860
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,3072,1,0,39.3433
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,256,1,0,6.5894
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,512,1,0,12.6758
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,12288,1,0,85.4737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,1,1,0,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,4096,1,0,54.9524
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,16,1,0,0.9473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,1024,1,0,25.4857
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,32,1,0,1.9448
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,64,1,0,3.2745
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,128,1,0,7.5014
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,1536,1,0,38.3600
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,256,1,0,12.8589
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,6144,1,0,81.4167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,1,1,0,0.2923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,16,1,0,1.7491
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,32,1,0,3.2704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,2048,1,0,51.1691
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,512,1,0,25.3936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,64,1,0,7.5422
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,1,1,0,1.1716
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,128,1,0,12.6475
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,16,1,0,3.4364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,32,1,0,7.5539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,256,1,0,25.3410
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,64,1,0,12.8197
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,3072,1,0,78.5917
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,1024,1,0,50.5305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,1,1,0,0.1630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,16,1,0,0.2009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,32,1,0,0.2264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,64,1,0,0.2629
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,128,1,0,0.2736
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,256,1,0,0.3595
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,512,1,0,0.5638
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,1024,1,0,0.8552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,1536,1,0,1.2081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,128,1,0,25.1023
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,2048,1,0,1.7675
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,3072,1,0,2.3933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,4096,1,0,3.0745
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,6144,1,0,4.6613
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,8192,1,0,6.2548
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,1,1,0,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,512,1,0,50.3256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,10240,1,0,7.9273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,16,1,0,0.2265
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,12288,1,0,9.7807
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,32,1,0,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,64,1,0,0.2684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,128,1,0,0.3548
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,256,1,0,0.5039
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,512,1,0,0.8345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,16384,1,0,13.2682
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,1024,1,0,1.5115
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,1536,1,0,2.3939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,2048,1,0,2.8950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,256,1,0,50.2869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,3072,1,0,4.3753
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,4096,1,0,5.8541
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,6144,1,0,8.8820
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,32768,1,0,29.6572
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,8192,1,0,12.0396
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,1,1,0,0.1755
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,16,1,0,0.2618
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,10240,1,0,15.2995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,32,1,0,0.2687
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,12288,1,0,18.8277
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,64,1,0,0.3520
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,128,1,0,0.4983
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,256,1,0,0.8236
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,512,1,0,1.4870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,1024,1,0,2.8098
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,1536,1,0,4.1937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,16384,1,0,25.7937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,2048,1,0,5.5422
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,3072,1,0,8.4376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,4096,1,0,11.3900
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,1,1,0,0.1935
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,6144,1,0,17.5793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,16,1,0,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,32,1,0,0.3525
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,64,1,0,1.2120
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,128,1,0,0.8206
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,256,1,0,1.4768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,8192,1,0,23.7698
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,512,1,0,2.9668
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,1024,1,0,5.4510
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,32768,1,0,58.0179
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,10240,1,0,30.1965
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,1536,1,0,8.1309
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,2048,1,0,10.8385
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,12288,1,0,36.8364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,3072,1,0,16.9359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,1,1,0,0.2000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,16,1,0,0.3507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,32,1,0,1.2122
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,16384,1,0,50.5978
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,64,1,0,0.8208
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,128,1,0,1.4645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,4096,1,0,22.5967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,256,1,0,3.0132
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,512,1,0,5.3929
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,6144,1,0,34.8626
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,1024,1,0,10.6781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,1536,1,0,16.2705
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,1,1,0,0.2113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,32,1,0,0.8185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,16,1,0,0.5008
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,8192,1,0,47.2298
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,2048,1,0,21.5878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,64,1,0,1.8039
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,128,1,0,2.7606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,256,1,0,5.5591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,10240,1,0,60.0575
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,3072,1,0,33.4400
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,512,1,0,10.6694
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,1,1,0,0.2398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,16,1,0,0.8240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,32,1,0,1.4713
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,64,1,0,2.9560
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,1024,1,0,21.2199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,4096,1,0,45.1293
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,128,1,0,5.3986
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,256,1,0,10.8091
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,1536,1,0,32.2379
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,1,1,0,0.2754
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,16,1,0,1.8142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,512,1,0,21.3287
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,2048,1,0,43.1221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,32,1,0,2.7781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,64,1,0,5.3736
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,128,1,0,10.7649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,1,1,0,0.3822
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,1024,1,0,42.4258
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,16,1,0,2.7615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,3072,1,0,66.3351
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,256,1,0,21.2701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,32,1,0,5.5337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,1,1,0,0.1558
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,16,1,0,0.1937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,32,1,0,0.2187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,64,1,0,10.5733
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,64,1,0,0.2564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,128,1,0,0.2610
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,1536,1,0,63.9941
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,256,1,0,0.3427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,512,1,0,1.2369
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,1024,1,0,0.7970
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,128,1,0,20.9749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,1536,1,0,1.1103
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,2048,1,0,1.6414
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,3072,1,0,2.1225
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,512,1,0,50.2162
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,4096,1,0,2.8180
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,6144,1,0,4.2774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,8192,1,0,5.7406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,1,1,0,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,2048,1,0,85.9706
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,10240,1,0,7.4525
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,16,1,0,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,32,1,0,0.2591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,12288,1,0,8.8457
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,64,1,0,0.2594
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,128,1,0,0.3373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,256,1,0,0.4762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,256,1,0,42.1399
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,16384,1,0,12.1877
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,512,1,0,0.7759
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,1024,1,0,1.3816
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,1536,1,0,2.1996
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,2048,1,0,2.6307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,3072,1,0,3.9880
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,4096,1,0,5.3507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,6144,1,0,8.2845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,1,1,0,0.1793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,32768,1,0,27.6616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,8192,1,0,11.0265
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,16,1,0,0.2587
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,32,1,0,0.2554
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,64,1,0,0.3368
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,10240,1,0,14.1686
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,128,1,0,0.4703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,256,1,0,0.7670
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,512,1,0,1.3541
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,12288,1,0,17.2833
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,1024,1,0,2.5536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,1536,1,0,3.8200
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,16384,1,0,23.6104
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,2048,1,0,5.0329
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,3072,1,0,7.6803
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,4096,1,0,10.5148
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,1,1,0,0.1754
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,6144,1,0,16.0328
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,16,1,0,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,32,1,0,0.3349
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,64,1,0,0.4697
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,128,1,0,1.4242
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,256,1,0,1.3417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,8192,1,0,21.5557
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,512,1,0,2.5288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,10240,1,0,27.4881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,1024,1,0,5.1201
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,32768,1,0,53.7072
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,1536,1,0,7.5174
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,2048,1,0,9.8406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,12288,1,0,34.0455
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,3072,1,0,15.3725
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,1,1,0,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,16384,1,0,46.5648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,16,1,0,0.3358
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,32,1,0,0.4694
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,4096,1,0,20.5382
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,64,1,0,0.7593
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,128,1,0,1.3415
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,256,1,0,2.5052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,512,1,0,4.9151
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,6144,1,0,31.8310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,1024,1,0,9.8366
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,1536,1,0,14.5715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,8192,1,0,43.0969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,1,1,0,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,2048,1,0,21.2103
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,16,1,0,0.4710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,32,1,0,0.7627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,10240,1,0,54.9157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,64,1,0,1.3398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,128,1,0,2.5017
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,3072,1,0,30.1806
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,256,1,0,4.8809
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,512,1,0,9.7581
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,4096,1,0,41.1580
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,1,1,0,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,16,1,0,0.7627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,32,1,0,1.3353
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,64,1,0,2.6777
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,1024,1,0,22.3413
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,128,1,0,4.8606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,1536,1,0,29.2136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,256,1,0,9.7527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,1,1,0,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,16384,1,0,93.7449
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,16,1,0,1.3439
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,6144,1,0,64.8532
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,2048,1,0,39.0817
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,32,1,0,2.6865
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,512,1,0,19.2459
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,64,1,0,4.8689
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,1,1,0,0.3673
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,16,1,0,2.6781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,128,1,0,9.5626
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,32,1,0,4.8631
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,256,1,0,19.0196
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,64,1,0,9.7276
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,1024,1,0,38.4041
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,1,1,0,0.1365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,16,1,0,0.1903
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,32,1,0,0.2109
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,64,1,0,0.2533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,128,1,0,0.2512
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,256,1,0,0.3344
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,512,1,0,0.4696
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,128,1,0,19.1490
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,1024,1,0,0.7649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,1536,1,0,1.0629
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,2048,1,0,1.3735
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,3072,1,0,2.0336
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,4096,1,0,2.6884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,512,1,0,38.1967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,6144,1,0,4.2962
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,8192,1,0,5.6391
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,10240,1,0,6.9572
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,1,1,0,0.1604
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,16,1,0,0.2121
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,12288,1,0,8.5122
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,256,1,0,38.0628
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,32,1,0,0.2497
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,64,1,0,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,128,1,0,0.3293
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,16384,1,0,11.7508
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,256,1,0,0.4601
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,512,1,0,1.2373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,1024,1,0,1.3211
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,1536,1,0,1.9126
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,2048,1,0,2.5036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,3072,1,0,3.9673
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,4096,1,0,5.0837
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,6144,1,0,7.9508
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,32768,1,0,26.6390
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,8192,1,0,10.5253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,1,1,0,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,1024,1,0,76.7071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,16,1,0,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,10240,1,0,13.4045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,32,1,0,0.2501
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,64,1,0,0.3291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,12288,1,0,16.5351
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,128,1,0,0.4557
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,256,1,0,0.7363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,512,1,0,1.2998
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,1024,1,0,2.6232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,16384,1,0,22.7450
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,1536,1,0,3.6033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,2048,1,0,4.7986
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,3072,1,0,7.3193
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,4096,1,0,10.0442
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,6144,1,0,15.1260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,1,1,0,0.1716
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,16,1,0,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,32,1,0,0.3279
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,64,1,0,0.4551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,128,1,0,0.7313
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,8192,1,0,20.7630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,256,1,0,1.2848
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,512,1,0,2.4048
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,10240,1,0,26.3666
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,1024,1,0,4.6794
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,32768,1,0,51.7312
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,1536,1,0,7.1744
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,12288,1,0,32.2801
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,2048,1,0,9.5089
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,3072,1,0,14.4561
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,1,1,0,0.1882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,16,1,0,0.3266
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,32,1,0,0.4547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,16384,1,0,44.9904
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,64,1,0,0.7314
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,4096,1,0,20.3570
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,128,1,0,1.2842
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,256,1,0,2.3937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,512,1,0,4.6654
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,6144,1,0,30.2709
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,1024,1,0,9.1616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,1536,1,0,15.5429
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,8192,1,0,41.0833
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,1,1,0,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,2048,1,0,18.4778
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,16,1,0,0.4574
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,32,1,0,0.7315
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,64,1,0,1.2767
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,128,1,0,2.9784
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,3072,1,0,28.8453
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,10240,1,0,52.5015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,256,1,0,4.6428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,512,1,0,9.1030
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,1,1,0,0.2279
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,16,1,0,1.2403
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,4096,1,0,39.1231
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,1024,1,0,18.3325
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,32,1,0,1.2785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,64,1,0,2.5727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,128,1,0,4.6312
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,1536,1,0,27.6699
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,256,1,0,9.0803
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,1,1,0,0.2599
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,6144,1,0,60.1910
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,2048,1,0,37.0566
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,512,1,0,18.2032
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,16,1,0,1.2849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,32,1,0,2.3786
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,64,1,0,4.7707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,1,1,0,0.3597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,128,1,0,9.0673
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,16,1,0,2.3807
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,1024,1,0,36.4668
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,3072,1,0,57.3121
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,32,1,0,4.7657
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,256,1,0,18.1862
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,64,1,0,9.0933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,1,1,0,0.1382
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,16,1,0,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,32,1,0,0.2111
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,64,1,0,0.2438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,128,1,0,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,256,1,0,0.3291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,512,1,0,0.4609
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,1024,1,0,0.7530
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,128,1,0,17.9261
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,1536,1,0,54.9828
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,1536,1,0,1.0440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,2048,1,0,1.3486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,3072,1,0,1.9895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,4096,1,0,2.7102
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,6144,1,0,3.9797
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,512,1,0,44.2057
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,8192,1,0,5.3935
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,10240,1,0,6.8177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,1,1,0,0.1523
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,12288,1,0,8.2924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,16,1,0,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,32,1,0,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,64,1,0,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,128,1,0,0.3244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,16384,1,0,11.6017
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,256,1,0,36.3195
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,256,1,0,0.4545
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,512,1,0,0.8447
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,1024,1,0,1.2933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,1536,1,0,1.8746
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,2048,1,0,2.4355
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,3072,1,0,3.7208
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,4096,1,0,5.1232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,6144,1,0,7.5689
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,32768,1,0,26.1516
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,8192,1,0,10.4798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,1,1,0,0.1552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,16,1,0,0.2418
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,10240,1,0,13.0695
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,32,1,0,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,64,1,0,0.3231
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,128,1,0,1.2091
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,256,1,0,0.7214
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,12288,1,0,16.0202
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,512,1,0,1.2698
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,1024,1,0,2.3733
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,1536,1,0,3.5218
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,16384,1,0,22.4387
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,2048,1,0,4.6645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,3072,1,0,7.2962
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,4096,1,0,9.7702
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,1,1,0,0.1635
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,6144,1,0,14.7727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,16,1,0,0.2464
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,32,1,0,0.3233
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,64,1,0,0.4496
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,128,1,0,1.2284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,256,1,0,1.2577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,8192,1,0,20.2256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,512,1,0,2.3383
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,1024,1,0,4.7402
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,10240,1,0,25.7934
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,32768,1,0,50.7800
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,1536,1,0,6.9673
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,12288,1,0,31.5599
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,2048,1,0,9.0708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,3072,1,0,14.1093
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,1,1,0,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,16,1,0,0.3243
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,32,1,0,0.4506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,16384,1,0,43.5969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,4096,1,0,19.2591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,64,1,0,0.7134
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,128,1,0,1.2533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,256,1,0,2.3325
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,512,1,0,4.5216
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,6144,1,0,29.5793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,1024,1,0,9.1265
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,1536,1,0,13.6389
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,8192,1,0,40.1528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,2048,1,0,18.0059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,1,1,0,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,16,1,0,0.4490
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,32,1,0,0.7176
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,64,1,0,1.4734
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,128,1,0,2.3315
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,256,1,0,4.5016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,10240,1,0,51.1932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,3072,1,0,28.0787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,512,1,0,8.8673
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,1,1,0,0.2203
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,16,1,0,0.7189
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,1024,1,0,17.9155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,4096,1,0,38.1234
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,32,1,0,1.2527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,64,1,0,2.8817
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,1536,1,0,26.7904
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,128,1,0,4.6671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,256,1,0,11.0786
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,2048,1,0,36.0191
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,1,1,0,0.2530
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,6144,1,0,58.7417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,512,1,0,17.5273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,16,1,0,1.5637
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,32,1,0,2.3151
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,64,1,0,4.4944
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,128,1,0,8.8066
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,3072,1,0,55.7710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,1,1,0,0.3571
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,16,1,0,2.3243
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,1024,1,0,41.8607
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,256,1,0,17.5138
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,32,1,0,4.6459
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,1,1,0,0.1301
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,64,1,0,8.8160
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,16,1,0,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,32,1,0,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,1536,1,0,53.4911
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,64,1,0,0.2452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,256,1,0,0.3264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,128,1,0,0.2564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,512,1,0,35.1337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,512,1,0,0.4573
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,1024,1,0,0.7442
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,1536,1,0,1.2693
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,128,1,0,22.1052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,2048,1,0,1.3316
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,3072,1,0,1.9735
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,4096,1,0,2.6022
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,6144,1,0,4.0010
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,8192,1,0,5.4734
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,2048,1,0,71.8896
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,10240,1,0,6.8883
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,12288,1,0,8.2207
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,1,1,0,0.1482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,16,1,0,0.2033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,32,1,0,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,64,1,0,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,128,1,0,0.3223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,16384,1,0,11.3216
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,256,1,0,1.3619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,512,1,0,0.8254
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,1024,1,0,1.2810
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,256,1,0,43.4271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,1536,1,0,1.8493
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,2048,1,0,2.4078
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,3072,1,0,3.8362
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,4096,1,0,4.9256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,6144,1,0,7.4840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,32768,1,0,25.7160
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,1,1,0,0.1625
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,8192,1,0,10.3281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,16,1,0,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,32,1,0,0.2444
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,10240,1,0,13.0709
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,64,1,0,0.3192
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,12288,1,0,15.8242
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,128,1,0,0.4456
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,256,1,0,0.7139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,512,1,0,1.2541
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,1024,1,0,2.5258
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,1536,1,0,3.8840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,16384,1,0,22.1844
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,2048,1,0,4.6123
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,3072,1,0,7.0473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,4096,1,0,9.5193
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,6144,1,0,14.7078
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,1,1,0,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,16,1,0,0.2398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,32,1,0,0.3192
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,64,1,0,0.4427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,128,1,0,0.7122
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,8192,1,0,19.9741
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,256,1,0,1.2436
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,512,1,0,2.3084
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,10240,1,0,25.4883
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,1024,1,0,4.5026
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,32768,1,0,50.3266
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,1536,1,0,6.7207
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,2048,1,0,9.1295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,12288,1,0,31.4376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,3072,1,0,13.8791
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,16384,1,0,43.0905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,1,1,0,0.1825
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,4096,1,0,18.9863
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,16,1,0,0.3198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,32,1,0,0.5168
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,64,1,0,0.7064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,128,1,0,1.2362
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,256,1,0,2.2932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,6144,1,0,30.0588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,512,1,0,4.4542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,8192,1,0,39.7057
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,1024,1,0,10.3201
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,1536,1,0,13.4405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,10240,1,0,51.4569
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,2048,1,0,17.7502
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,1,1,0,0.1939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,16,1,0,0.5177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,32,1,0,0.7099
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,64,1,0,1.2358
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,128,1,0,2.2853
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,12288,1,0,61.9971
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,256,1,0,4.4360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,3072,1,0,27.7652
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,512,1,0,8.7441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,1,1,0,0.2172
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,16,1,0,0.7100
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,32,1,0,1.2373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,1024,1,0,17.4196
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,4096,1,0,37.6846
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,64,1,0,2.2879
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,128,1,0,4.4159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,16384,1,0,85.9818
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,1536,1,0,26.6073
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,256,1,0,8.9056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,1,1,0,0.2533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,16,1,0,1.2414
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,32,1,0,2.8578
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,64,1,0,4.4252
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,512,1,0,17.2980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,2048,1,0,35.5564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,1,1,0,0.3509
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,128,1,0,11.1446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,16,1,0,2.2923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,32,1,0,5.5721
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,256,1,0,17.4950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,64,1,0,8.8597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,1024,1,0,34.9979
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,128,1,0,17.2245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,512,1,0,34.7980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,256,1,0,34.5909
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,0,0.2190
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,16,1,0,0.2529
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,32,1,0,0.2653
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,64,1,0,0.2900
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,128,1,0,0.3751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,256,1,0,0.5249
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,512,1,0,0.8067
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1024,1,0,1.4205
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1536,1,0,2.0467
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,2048,1,0,2.6959
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,3072,1,0,4.2130
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,4096,1,0,5.3131
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,6144,1,0,8.1942
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,0,0.2252
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,16,1,0,0.2644
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,32,1,0,0.2934
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,64,1,0,0.3731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,128,1,0,0.5257
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,8192,1,0,10.7603
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,256,1,0,1.4129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,512,1,0,1.4134
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1024,1,0,2.6741
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,10240,1,0,13.4684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1536,1,0,3.9600
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,2048,1,0,5.3803
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,3072,1,0,7.9768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,4096,1,0,10.4141
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,12288,1,0,16.2557
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,6144,1,0,15.8974
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,8192,1,0,21.1552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,0,0.2288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,10240,1,0,26.8738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,16,1,0,0.2960
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,32,1,0,0.3845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,128,1,0,0.8054
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,64,1,0,1.3678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,12288,1,0,32.3777
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,256,1,0,1.4121
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,512,1,0,2.8824
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,16384,1,0,22.0290
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1024,1,0,5.1608
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,16384,1,0,36.7577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1536,1,0,7.8429
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,2048,1,0,10.5444
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,3072,1,0,15.5768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,32768,1,0,40.0663
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,4096,1,0,20.5894
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,0,0.2386
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,6144,1,0,31.1990
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,32,1,0,0.5296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,32768,1,0,79.6460
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,16,1,0,0.3739
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,64,1,0,0.8067
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,8192,1,0,35.0023
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,128,1,0,1.4129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,256,1,0,2.6674
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,512,1,0,5.1438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,10240,1,0,44.1824
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1024,1,0,10.1090
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,12288,1,0,53.6723
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1536,1,0,15.3070
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,2048,1,0,20.2997
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,3072,1,0,30.4807
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,16384,1,0,73.0839
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,0,0.2506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,4096,1,0,33.9087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,16,1,0,0.5359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,32,1,0,0.8063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,64,1,0,1.6745
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,128,1,0,2.6661
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,256,1,0,5.1515
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,512,1,0,10.0735
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,6144,1,0,51.5081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1024,1,0,20.1956
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,8192,1,0,69.4957
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1536,1,0,29.9616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,2048,1,0,32.9864
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,0,0.2673
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,16,1,0,0.8084
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,32,1,0,1.4175
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,64,1,0,2.6653
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,10240,1,0,88.8765
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,128,1,0,5.1240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,256,1,0,10.0843
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,3072,1,0,50.0905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,12288,1,0,104.7187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,512,1,0,19.9158
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,0,0.2881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,4096,1,0,67.4049
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,16,1,0,1.4129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,32,1,0,2.6648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1024,1,0,32.7147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,64,1,0,5.1242
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,128,1,0,10.2255
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,0,0.3694
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1536,1,0,49.0208
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,256,1,0,20.1299
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,16,1,0,2.6631
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,32,1,0,5.2932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,512,1,0,32.6190
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,64,1,0,10.0534
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,6144,1,0,100.5205
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,2048,1,0,65.5540
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,0,0.5252
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,128,1,0,19.8843
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,16,1,0,5.3076
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,32,1,0,10.0606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,256,1,0,32.5659
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,0,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,16,1,0,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,32,1,0,0.2492
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,64,1,0,19.8353
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,64,1,0,0.2615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,128,1,0,0.3264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,256,1,0,0.4484
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1024,1,0,71.1397
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,512,1,0,0.6584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1024,1,0,1.1587
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1536,1,0,1.6545
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,2048,1,0,2.3811
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,128,1,0,32.5340
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,3072,1,0,3.1915
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,4096,1,0,4.2119
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,6144,1,0,6.3675
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,512,1,0,64.8333
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,8192,1,0,8.5615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,10240,1,0,10.9710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,0,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,16,1,0,0.2404
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,32,1,0,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,12288,1,0,13.1233
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,64,1,0,0.3290
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,128,1,0,1.3766
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,16384,1,0,17.9633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,256,1,0,0.6578
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,512,1,0,1.1562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1024,1,0,2.1382
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1536,1,0,3.3166
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,2048,1,0,4.0950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,256,1,0,64.8300
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,3072,1,0,6.1828
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,4096,1,0,8.4522
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,6144,1,0,12.7197
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,32768,1,0,38.7595
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,0,0.2134
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,8192,1,0,17.0544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,16,1,0,0.2625
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,32,1,0,0.3227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,64,1,0,0.4447
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,10240,1,0,21.5485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,128,1,0,0.6656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,256,1,0,1.1500
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,512,1,0,2.1297
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1024,1,0,4.0485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,12288,1,0,26.0437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1536,1,0,6.0483
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,2048,1,0,8.2206
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,16384,1,0,35.2672
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,3072,1,0,12.3776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,4096,1,0,16.7150
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,0,0.2268
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,16,1,0,0.3253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,6144,1,0,24.9922
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,32,1,0,1.6185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,64,1,0,0.7320
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,128,1,0,1.7183
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,8192,1,0,33.6639
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,256,1,0,2.1304
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,512,1,0,4.0360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,32768,1,0,69.8891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1024,1,0,8.1664
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,10240,1,0,42.5793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1536,1,0,12.6722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,2048,1,0,15.8444
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,12288,1,0,51.5234
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,3072,1,0,24.2820
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,0,0.2342
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,16384,1,0,63.1867
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,16,1,0,0.4461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,32,1,0,0.6619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,4096,1,0,32.6282
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,64,1,0,1.1533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,128,1,0,2.1243
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,256,1,0,4.0339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,512,1,0,7.9318
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,6144,1,0,49.5031
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1024,1,0,15.7442
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1536,1,0,23.5964
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,8192,1,0,59.6820
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,0,0.2398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,16,1,0,0.6630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,32,1,0,1.1548
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,2048,1,0,31.6639
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,64,1,0,2.1244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,10240,1,0,75.6240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,128,1,0,4.0327
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,256,1,0,7.9098
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,3072,1,0,48.0619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,512,1,0,15.8541
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,12288,1,0,91.8755
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,0,0.2586
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,4096,1,0,57.5505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,16,1,0,1.1550
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,32,1,0,2.1232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1024,1,0,34.2453
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,64,1,0,4.2853
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,128,1,0,7.9107
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,256,1,0,15.9176
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1536,1,0,47.0657
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,0,0.3226
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,16,1,0,2.4287
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,32,1,0,4.0255
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,2048,1,0,55.6204
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,512,1,0,31.3069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,64,1,0,7.9103
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,0,0.4451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,128,1,0,15.8214
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,16,1,0,4.0323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,32,1,0,7.9146
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1024,1,0,55.1504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,8192,1,0,118.9131
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,256,1,0,31.3113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,0,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,64,1,0,15.8313
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,16,1,0,0.2202
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,32,1,0,0.2352
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,64,1,0,0.2399
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,128,1,0,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,256,1,0,0.3894
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,512,1,0,0.5573
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1024,1,0,0.9391
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1536,1,0,1.5838
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,2048,1,0,1.7336
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,3072,1,0,2.5491
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1536,1,0,82.9156
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,128,1,0,31.2255
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,512,1,0,54.8627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,4096,1,0,3.3670
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,6144,1,0,5.2507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,8192,1,0,6.8401
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,0,0.1999
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,10240,1,0,8.6238
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,16,1,0,0.2288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,12288,1,0,10.5586
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,32,1,0,0.2458
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,64,1,0,0.2958
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,16384,1,0,14.5671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,256,1,0,0.5547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,128,1,0,1.3851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,512,1,0,0.9310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1024,1,0,1.7133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,256,1,0,54.8531
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1536,1,0,2.6461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,2048,1,0,3.2584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,3072,1,0,5.0726
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,4096,1,0,6.5463
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,32768,1,0,31.8273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,6144,1,0,9.9536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,0,0.1978
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,8192,1,0,13.4626
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,16,1,0,0.2460
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,32,1,0,0.2942
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,10240,1,0,17.1014
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,64,1,0,0.3894
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,128,1,0,0.5574
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,256,1,0,0.9299
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,512,1,0,1.7008
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,12288,1,0,20.9492
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1024,1,0,3.5384
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1536,1,0,4.7402
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,2048,1,0,6.3080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,16384,1,0,28.6836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,3072,1,0,9.5872
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,4096,1,0,12.9236
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,0,0.2118
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,6144,1,0,19.9050
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,16,1,0,0.2979
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,32,1,0,0.3915
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,64,1,0,0.5564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,128,1,0,0.9283
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,8192,1,0,26.7301
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,256,1,0,1.6983
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,512,1,0,3.3899
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,32768,1,0,63.5122
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,10240,1,0,34.1026
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1024,1,0,6.2264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1536,1,0,10.1204
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,12288,1,0,41.4665
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,2048,1,0,12.4496
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,3072,1,0,19.1941
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,0,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,16384,1,0,56.8874
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,16,1,0,0.3909
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,32,1,0,0.5555
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,4096,1,0,25.8171
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,64,1,0,0.9295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,128,1,0,1.7021
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,256,1,0,3.1762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,512,1,0,6.3824
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,6144,1,0,39.4169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1024,1,0,12.3139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1536,1,0,18.5332
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,8192,1,0,53.3500
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,0,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,2048,1,0,24.8732
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,16,1,0,0.5571
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,32,1,0,0.9320
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,64,1,0,1.7018
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,10240,1,0,67.7087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,128,1,0,3.1889
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,256,1,0,6.1853
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,3072,1,0,37.9441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,512,1,0,12.2509
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,0,0.2433
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,12288,1,0,82.3505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,16,1,0,0.9345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,32,1,0,1.6973
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,4096,1,0,51.2177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,64,1,0,3.1753
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1024,1,0,24.6781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,128,1,0,6.1760
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,256,1,0,12.4287
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,0,0.2955
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,16,1,0,1.6989
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,32,1,0,3.3747
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1536,1,0,40.1854
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,512,1,0,24.3803
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,64,1,0,6.1765
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,2048,1,0,49.3403
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,0,0.3908
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,128,1,0,12.2100
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,16,1,0,3.1782
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,32,1,0,6.1811
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,256,1,0,24.5208
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,64,1,0,12.2166
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1024,1,0,48.8344
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,0,0.1607
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,16,1,0,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,32,1,0,0.2069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,64,1,0,0.2175
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,128,1,0,0.2259
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,128,1,0,24.2829
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,256,1,0,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,512,1,0,0.3563
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1024,1,0,0.5661
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1536,1,0,0.8294
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,2048,1,0,1.1098
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,3072,1,0,1.7843
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,512,1,0,48.5995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,4096,1,0,2.4413
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,6144,1,0,3.7553
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,8192,1,0,5.3542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,10240,1,0,6.6428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,0,0.1747
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,12288,1,0,8.2294
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,256,1,0,48.5567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,16,1,0,0.2021
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,16384,1,0,11.5473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,32,1,0,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,64,1,0,0.2244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,128,1,0,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,256,1,0,0.3467
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,512,1,0,0.5185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1024,1,0,0.9398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1536,1,0,1.4484
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,2048,1,0,2.0240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,3072,1,0,3.3439
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,4096,1,0,4.8184
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,32768,1,0,26.3432
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,6144,1,0,7.4394
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,0,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1024,1,0,89.9768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,8192,1,0,10.2105
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,16,1,0,0.2139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,32,1,0,0.2309
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,10240,1,0,13.1490
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,64,1,0,0.2639
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,128,1,0,0.3358
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,12288,1,0,16.3981
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,256,1,0,0.4962
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,512,1,0,0.8629
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1024,1,0,1.6937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1536,1,0,2.7371
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,16384,1,0,22.4946
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,2048,1,0,3.9406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,3072,1,0,6.5213
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,4096,1,0,9.3129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,32768,1,0,32.1630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,6144,1,0,14.9956
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,0,0.1929
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,16,1,0,0.2242
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,32,1,0,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,64,1,0,0.3367
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,128,1,0,0.4809
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,8192,1,0,20.3678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,256,1,0,0.8136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,512,1,0,1.7395
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1024,1,0,3.2706
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,16384,1,0,25.5564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1536,1,0,5.5045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,2048,1,0,7.7027
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,10240,1,0,26.6369
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,12288,1,0,32.4006
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,3072,1,0,13.0158
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,0,0.1911
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,4096,1,0,18.6136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,16,1,0,0.2657
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,32,1,0,0.3351
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,64,1,0,0.4820
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,128,1,0,0.7858
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,256,1,0,1.6527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,8192,1,0,22.0923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,512,1,0,2.9107
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,6144,1,0,29.7025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1024,1,0,6.3810
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,10240,1,0,28.6097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1536,1,0,10.5866
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,0,0.1958
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,16,1,0,0.3359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,2048,1,0,15.6727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,32,1,0,0.4837
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,64,1,0,0.7863
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,128,1,0,1.3900
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,4096,1,0,20.1295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,256,1,0,2.7289
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,3072,1,0,26.2717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,16384,1,0,50.6931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,512,1,0,5.7137
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,6144,1,0,31.5697
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1024,1,0,12.9879
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,0,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,16,1,0,0.4823
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,32,1,0,0.7884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,64,1,0,1.5937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,128,1,0,2.6347
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1536,1,0,21.3090
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,256,1,0,5.3366
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,2048,1,0,18.5121
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,0,0.2287
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,16,1,0,0.7913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,512,1,0,11.4072
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,32,1,0,1.5954
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,64,1,0,2.6250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,128,1,0,5.1350
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1024,1,0,18.3089
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,0,0.2649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,16,1,0,1.3977
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,256,1,0,10.5503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,32,1,0,2.8455
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1536,1,0,27.4566
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,64,1,0,5.1417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,0,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,16,1,0,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,512,1,0,18.1871
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,32,1,0,0.1944
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,64,1,0,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,128,1,0,10.1509
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,128,1,0,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,256,1,0,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,512,1,0,0.3386
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1024,1,0,0.5220
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1536,1,0,0.7707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,2048,1,0,1.0684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,3072,1,0,1.7219
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,256,1,0,18.1561
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,4096,1,0,2.3536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,6144,1,0,3.9086
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,8192,1,0,5.1153
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,10240,1,0,6.5729
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,0,0.1623
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,16,1,0,0.1950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,12288,1,0,8.1980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,32,1,0,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,64,1,0,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,128,1,0,0.2505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,256,1,0,0.3213
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,16384,1,0,11.4103
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,512,1,0,0.4771
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1024,1,0,0.8611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1536,1,0,1.3437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,2048,1,0,2.1565
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,3072,1,0,3.2302
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,4096,1,0,4.5440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,6144,1,0,7.3529
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,32768,1,0,26.2775
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,0,0.1773
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,8192,1,0,10.1709
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,16,1,0,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,32,1,0,0.2181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,10240,1,0,13.2219
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,64,1,0,0.2533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,128,1,0,0.3154
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,256,1,0,0.4495
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,512,1,0,0.7754
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1024,1,0,1.5399
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,12288,1,0,16.0227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1536,1,0,2.5375
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,16384,1,0,22.4785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,2048,1,0,3.7419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,3072,1,0,6.3955
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,4096,1,0,9.0547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,32768,1,0,29.4323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,6144,1,0,14.4113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,0,0.1772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,16,1,0,0.2145
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,32,1,0,0.2509
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,64,1,0,0.3137
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,128,1,0,0.4364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,256,1,0,0.7232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,8192,1,0,20.2977
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,512,1,0,1.3515
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,16384,1,0,22.8131
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1024,1,0,2.9595
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,10240,1,0,26.1602
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1536,1,0,4.9678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,2048,1,0,7.2945
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,12288,1,0,32.4363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,3072,1,0,12.5133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,0,0.1961
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,4096,1,0,18.1223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,16,1,0,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,32,1,0,0.3169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,64,1,0,0.4357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,128,1,0,0.7002
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,256,1,0,1.2580
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,8192,1,0,19.4044
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,512,1,0,2.5667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1024,1,0,5.9433
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,6144,1,0,29.2548
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,10240,1,0,25.1719
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1536,1,0,9.8634
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,12288,1,0,31.4958
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,0,0.1948
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,2048,1,0,14.5497
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,16,1,0,0.3173
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,32,1,0,0.4392
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,64,1,0,0.6997
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,128,1,0,1.2197
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,256,1,0,2.5548
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,4096,1,0,17.4015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,512,1,0,5.0123
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,3072,1,0,25.3486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,6144,1,0,27.4538
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1024,1,0,11.6638
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,0,0.1992
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,16,1,0,0.4385
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,32,1,0,0.6987
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,2048,1,0,15.8009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,64,1,0,1.2140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1536,1,0,19.6500
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,128,1,0,2.2814
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,8192,1,0,38.3939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,256,1,0,4.6059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,0,0.2163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,16,1,0,0.7036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,512,1,0,9.9304
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,32,1,0,1.2223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,64,1,0,2.2835
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1024,1,0,15.5551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,128,1,0,4.4185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,0,0.2506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,16,1,0,1.2253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,256,1,0,9.2542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,32,1,0,2.2834
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,64,1,0,4.4244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,512,1,0,15.4363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,0,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,128,1,0,8.7104
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,16,1,0,0.1814
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,2048,1,0,31.2598
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,32,1,0,0.1954
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,64,1,0,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,128,1,0,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,256,1,0,0.2483
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,512,1,0,0.3258
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1024,1,0,0.5051
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,256,1,0,15.4094
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1536,1,0,0.7397
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,2048,1,0,1.0095
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,3072,1,0,1.6389
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1024,1,0,30.8890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,4096,1,0,2.2639
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,6144,1,0,3.7700
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,8192,1,0,5.0239
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,0,0.1697
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,16,1,0,0.1896
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,10240,1,0,6.3461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,32,1,0,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,12288,1,0,7.9288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,64,1,0,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,128,1,0,0.2439
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,256,1,0,0.3139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,16384,1,0,10.9135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,512,1,0,0.4571
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1024,1,0,0.8219
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1536,1,0,1.2804
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,2048,1,0,1.8532
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,3072,1,0,3.1352
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,4096,1,0,4.4154
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,6144,1,0,7.2000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,32768,1,0,25.4775
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,0,0.1617
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,8192,1,0,9.8007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,16,1,0,0.2002
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,32,1,0,0.2108
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,64,1,0,0.2466
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,10240,1,0,12.5776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,12288,1,0,15.7409
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,128,1,0,0.3020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,256,1,0,0.4322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,512,1,0,0.7286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1024,1,0,1.6663
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,16384,1,0,21.6840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1536,1,0,2.4007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,2048,1,0,3.5639
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,32768,1,0,28.0754
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,3072,1,0,6.1475
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,4096,1,0,8.6184
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,0,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,6144,1,0,14.1481
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,16,1,0,0.2106
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,32,1,0,0.2479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,64,1,0,0.3064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,8192,1,0,19.4467
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,128,1,0,0.4178
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,256,1,0,1.4147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,512,1,0,1.2752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1024,1,0,2.7726
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,16384,1,0,21.4817
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,10240,1,0,25.4083
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1536,1,0,4.7224
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,2048,1,0,7.1503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,12288,1,0,31.1363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,3072,1,0,12.1452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,0,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,4096,1,0,17.3126
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,16,1,0,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,32,1,0,0.3052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,64,1,0,0.4188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,128,1,0,0.6560
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,256,1,0,1.4291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,8192,1,0,18.0467
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,512,1,0,2.4013
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1024,1,0,5.4345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,10240,1,0,23.4815
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,6144,1,0,27.9645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1536,1,0,9.3610
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,12288,1,0,29.4715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,2048,1,0,14.0479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,16,1,0,0.3056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,32,1,0,0.4188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,64,1,0,0.6584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,128,1,0,1.1283
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,4096,1,0,16.0420
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,256,1,0,2.1998
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,512,1,0,4.8789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,3072,1,0,24.4482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,0,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1024,1,0,11.0266
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,16,1,0,0.4194
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,32,1,0,0.6593
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,64,1,0,1.1291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,128,1,0,2.1132
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,2048,1,0,14.4324
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,256,1,0,4.2845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1536,1,0,18.6815
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,0,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,16,1,0,0.6606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,512,1,0,9.4797
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,32,1,0,1.1329
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,64,1,0,2.1143
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1024,1,0,14.2085
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,128,1,0,4.2800
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,0,0.2420
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,16,1,0,1.1364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,32,1,0,2.1170
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,256,1,0,8.4042
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,64,1,0,4.2862
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,0,0.1504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,512,1,0,14.0762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,16,1,0,0.1815
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,128,1,0,8.2429
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,64,1,0,0.1960
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,32,1,0,0.1892
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,128,1,0,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,256,1,0,0.2463
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,512,1,0,0.3176
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,256,1,0,14.0560
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1024,1,0,0.4946
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1536,1,0,0.7174
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,2048,1,0,1.0003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1024,1,0,28.1361
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,3072,1,0,1.6117
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,4096,1,0,2.4373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,6144,1,0,3.5332
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,0,0.1583
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,8192,1,0,4.9358
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,10240,1,0,6.2405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,16,1,0,0.1919
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,32,1,0,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,12288,1,0,7.7281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,64,1,0,0.2081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,128,1,0,0.2404
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,256,1,0,0.3081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,16384,1,0,10.8887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,512,1,0,0.4447
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1024,1,0,0.8027
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1536,1,0,1.2491
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,2048,1,0,2.0041
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,3072,1,0,3.0678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,4096,1,0,4.3028
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,6144,1,0,6.8678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,32768,1,0,25.2209
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,8192,1,0,9.7910
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,0,0.1690
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,10240,1,0,12.5298
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,16,1,0,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,32,1,0,0.2093
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,64,1,0,0.2407
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,12288,1,0,15.2708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,128,1,0,0.3000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,256,1,0,0.4223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,512,1,0,0.7134
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1024,1,0,1.4110
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,16384,1,0,21.5614
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1536,1,0,2.3367
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,2048,1,0,3.4535
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,3072,1,0,5.9980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,4096,1,0,8.5972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,32768,1,0,27.4155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,6144,1,0,13.6794
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,0,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,16,1,0,0.2081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,32,1,0,0.2395
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,8192,1,0,19.1373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,64,1,0,0.2976
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,128,1,0,0.4080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,256,1,0,0.6616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,512,1,0,1.2367
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,10240,1,0,24.8965
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1024,1,0,2.7082
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,16384,1,0,20.7877
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1536,1,0,4.5337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,12288,1,0,30.6327
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,2048,1,0,6.8587
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,3072,1,0,11.9409
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,0,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,16,1,0,0.2397
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,32,1,0,0.2978
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,4096,1,0,16.9310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,64,1,0,0.4082
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,128,1,0,0.6384
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,256,1,0,1.1332
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,8192,1,0,17.3341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,512,1,0,2.3126
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,10240,1,0,22.6576
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1024,1,0,5.2911
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,6144,1,0,27.7333
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1536,1,0,9.0639
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,0,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,16,1,0,0.3000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,2048,1,0,13.5419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,32,1,0,0.4092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,64,1,0,0.6375
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,128,1,0,1.0862
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,4096,1,0,15.3320
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,256,1,0,2.1225
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,512,1,0,4.5480
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,3072,1,0,23.8675
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,6144,1,0,24.4280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1024,1,0,10.4124
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,0,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,16,1,0,0.4110
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,2048,1,0,13.7641
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,64,1,0,1.4190
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1536,1,0,18.3634
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,128,1,0,2.0270
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,32,1,0,0.6379
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,256,1,0,4.1014
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,3072,1,0,21.9079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,512,1,0,8.9180
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,0,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,16,1,0,0.6397
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,32,1,0,1.0924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,64,1,0,2.0288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,0,0.2411
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1024,1,0,13.5159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,128,1,0,3.9298
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,16,1,0,1.0964
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,32,1,0,2.0345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,256,1,0,8.2598
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,64,1,0,3.9231
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,512,1,0,13.4167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,128,1,0,7.8969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,256,1,0,13.3579
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,0,0.1482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,16,1,0,0.1775
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,32,1,0,0.1890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,64,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1536,1,0,0.7182
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,128,1,0,0.2056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,256,1,0,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,2048,1,0,0.9880
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,512,1,0,0.3113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,3072,1,0,1.8186
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,4096,1,0,2.2308
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,6144,1,0,3.4931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1024,1,0,0.4929
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,8192,1,0,4.8140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,0,0.1646
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,10240,1,0,6.1864
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,16,1,0,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,12288,1,0,7.8214
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,32,1,0,0.2003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,64,1,0,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,128,1,0,0.2345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,16384,1,0,10.7914
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,256,1,0,0.3036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,512,1,0,0.4397
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1024,1,0,0.7966
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1536,1,0,1.4768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,2048,1,0,1.7715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,3072,1,0,3.0228
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,4096,1,0,4.2519
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,32768,1,0,25.1981
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,6144,1,0,6.9142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,0,0.1626
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,8192,1,0,9.6413
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,10240,1,0,12.2855
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,16,1,0,0.1912
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,32,1,0,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,64,1,0,0.2376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,12288,1,0,15.3142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,128,1,0,0.2939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,256,1,0,0.4147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,512,1,0,0.6968
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1024,1,0,1.3956
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,16384,1,0,21.1477
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1536,1,0,2.3218
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,2048,1,0,3.4365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,3072,1,0,6.0147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,32768,1,0,27.0270
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,4096,1,0,8.5009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,0,0.1688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,6144,1,0,13.4827
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,16,1,0,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,32,1,0,0.2381
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,64,1,0,0.2920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,8192,1,0,18.8355
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,128,1,0,0.4025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,256,1,0,1.4014
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,512,1,0,1.2032
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,16384,1,0,20.4412
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,10240,1,0,24.4481
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1024,1,0,2.6633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1536,1,0,4.4581
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,2048,1,0,6.7188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,12288,1,0,30.6514
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,3072,1,0,11.8516
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,0,0.1872
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,32,1,0,0.2932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,16,1,0,0.2373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,4096,1,0,16.9165
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,64,1,0,0.4040
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,128,1,0,0.6280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,8192,1,0,16.9931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,256,1,0,1.1131
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,512,1,0,2.4485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1024,1,0,5.2061
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,6144,1,0,27.5822
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,10240,1,0,22.2402
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1536,1,0,9.0815
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,0,0.1828
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,2048,1,0,13.3749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,16,1,0,0.2937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,32,1,0,0.4016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,4096,1,0,15.0223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,64,1,0,0.6266
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,128,1,0,1.0686
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,256,1,0,2.0740
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,3072,1,0,23.4485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,6144,1,0,23.9016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,512,1,0,4.4611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,0,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1024,1,0,10.2496
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,16,1,0,0.4063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,2048,1,0,13.3863
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,32,1,0,0.6258
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,8192,1,0,33.5964
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,64,1,0,1.0687
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1536,1,0,18.0004
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,128,1,0,1.9840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,256,1,0,4.1966
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,3072,1,0,21.3703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,0,0.2025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,16,1,0,0.6277
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,512,1,0,8.7142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,32,1,0,1.4049
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,64,1,0,1.9896
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1024,1,0,13.1715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,128,1,0,4.0190
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,0,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,256,1,0,7.8917
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,16,1,0,1.0767
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1536,1,0,19.8069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,32,1,0,1.9971
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,512,1,0,13.0550
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,64,1,0,3.8361
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,1,1,0,0.2287
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,16,1,0,0.2594
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,128,1,0,7.7031
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,32,1,0,0.2920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,128,1,0,0.3884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,64,1,0,0.3396
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,256,1,0,13.0074
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,256,1,0,0.5448
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,512,1,0,0.8423
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,1024,1,0,1.5006
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,1536,1,0,2.2441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,2048,1,0,2.8421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,3072,1,0,4.2743
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,4096,1,0,5.8752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,512,1,0,25.9207
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,6144,1,0,8.4336
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,8192,1,0,11.6826
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,1,1,0,0.2457
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,16,1,0,0.2905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,10240,1,0,14.6983
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,32,1,0,0.3366
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,64,1,0,0.3806
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,12288,1,0,17.3942
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,128,1,0,1.4272
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,256,1,0,0.8352
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,512,1,0,1.4797
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,1024,1,0,2.7921
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,16384,1,0,23.9907
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,1536,1,0,4.3303
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,2048,1,0,5.6303
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,3072,1,0,8.3305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,4096,1,0,11.4548
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,1,1,0,0.2495
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,6144,1,0,17.0049
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,32768,1,0,42.4258
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,16,1,0,0.3351
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,32,1,0,0.3828
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,8192,1,0,22.8303
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,64,1,0,0.5510
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,128,1,0,0.8307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,10240,1,0,28.0063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,256,1,0,1.4683
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,512,1,0,2.7668
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,12288,1,0,33.7642
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,1024,1,0,5.9107
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,1536,1,0,7.9609
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,16384,1,0,38.7936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,2048,1,0,10.9661
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,3072,1,0,16.1761
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,4096,1,0,21.5638
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,1,1,0,0.2699
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,16,1,0,0.3935
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,32,1,0,1.4337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,6144,1,0,32.5245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,64,1,0,0.8705
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,8192,1,0,36.5092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,128,1,0,1.7341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,256,1,0,2.7523
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,32768,1,0,83.3184
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,512,1,0,5.9870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,10240,1,0,46.1719
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,1024,1,0,10.6793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,12288,1,0,56.3905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,1536,1,0,16.2284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,2048,1,0,21.0197
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,1,1,0,0.2798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,16384,1,0,76.1887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,3072,1,0,31.8437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,16,1,0,0.5614
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,4096,1,0,35.5250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,32,1,0,0.8275
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,64,1,0,1.4618
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,128,1,0,2.7514
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,256,1,0,5.5005
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,512,1,0,10.8127
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,6144,1,0,54.8164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,1024,1,0,23.0307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,1536,1,0,31.2248
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,8192,1,0,72.7704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,1,1,0,0.3011
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,2048,1,0,34.4441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,16,1,0,0.8298
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,32,1,0,1.4624
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,64,1,0,2.9645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,128,1,0,5.4828
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,3072,1,0,54.0414
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,256,1,0,11.3802
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,10240,1,0,92.8796
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,512,1,0,20.7984
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,1,1,0,0.3524
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,16,1,0,1.4650
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,32,1,0,2.7503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,4096,1,0,71.3441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,1024,1,0,34.1753
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,64,1,0,5.4879
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,128,1,0,11.4469
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,1536,1,0,51.2684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,256,1,0,20.7818
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,1,1,0,1.3855
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,16,1,0,2.9752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,6144,1,0,105.4036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,32,1,0,5.8371
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,512,1,0,36.1313
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,64,1,0,10.5869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,2048,1,0,68.6530
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,1,1,0,1.6922
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,128,1,0,22.2020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,16,1,0,5.4967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,32,1,0,10.5767
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,256,1,0,33.9514
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,1,1,0,0.2024
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,16,1,0,0.2417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,64,1,0,20.7719
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,32,1,0,0.2710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,64,1,0,0.3172
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,128,1,0,0.3382
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,256,1,0,0.4677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,512,1,0,1.4551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,1024,1,0,1.2642
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,1536,1,0,1.8095
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,2048,1,0,2.5911
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,3072,1,0,3.5054
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,4096,1,0,4.8220
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,128,1,0,36.7777
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,6144,1,0,6.9916
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,8192,1,0,9.5614
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,512,1,0,67.7269
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,10240,1,0,12.0180
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,12288,1,0,14.5058
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,1,1,0,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,4096,1,0,138.3655
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,16,1,0,0.2711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,32,1,0,0.3134
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,64,1,0,0.3377
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,16384,1,0,19.6543
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,128,1,0,0.4817
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,512,1,0,1.3007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,256,1,0,0.6955
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,1024,1,0,2.3155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,256,1,0,67.8283
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,1536,1,0,3.5813
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,2048,1,0,4.5150
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,3072,1,0,6.8919
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,4096,1,0,9.1752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,6144,1,0,13.7741
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,1,1,0,0.2216
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,32768,1,0,42.0972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,8192,1,0,18.2855
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,16,1,0,0.3149
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,32,1,0,0.3479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,64,1,0,0.4648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,128,1,0,0.6967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,10240,1,0,23.2645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,256,1,0,1.6078
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,512,1,0,2.2876
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,12288,1,0,28.1437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,1024,1,0,4.3839
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,1536,1,0,6.5176
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,2048,1,0,8.8798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,16384,1,0,38.2685
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,3072,1,0,13.1333
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,4096,1,0,17.9881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,1,1,0,0.2174
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,6144,1,0,27.2704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,16,1,0,0.3428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,32,1,0,0.4607
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,8192,1,0,36.0375
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,128,1,0,1.3164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,64,1,0,1.4630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,32768,1,0,75.3635
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,256,1,0,2.2813
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,10240,1,0,45.7177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,512,1,0,4.3445
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,1024,1,0,8.7402
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,12288,1,0,55.3933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,1536,1,0,13.7053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,2048,1,0,18.1143
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,16384,1,0,68.2447
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,3072,1,0,26.6425
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,1,1,0,0.2288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,4096,1,0,35.1820
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,16,1,0,0.4761
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,32,1,0,0.7461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,64,1,0,1.2298
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,128,1,0,2.2722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,256,1,0,4.3421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,6144,1,0,53.3323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,512,1,0,8.9896
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,1024,1,0,16.8917
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,8192,1,0,65.5983
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,1536,1,0,26.3924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,10240,1,0,81.9671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,2048,1,0,34.1046
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,1,1,0,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,16,1,0,1.4544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,32768,1,0,149.2603
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,32,1,0,1.3972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,64,1,0,2.5056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,128,1,0,4.5234
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,12288,1,0,100.4101
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,3072,1,0,51.9192
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,256,1,0,8.5513
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,512,1,0,17.0448
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,1,1,0,0.2727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,16,1,0,1.4069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,4096,1,0,62.6924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,32,1,0,2.4335
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,64,1,0,4.3310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,1024,1,0,33.8064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,128,1,0,8.4924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,256,1,0,16.8255
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,1536,1,0,52.1440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,1,1,0,0.3298
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,16,1,0,2.4357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,6144,1,0,95.6556
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,32,1,0,4.5140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,2048,1,0,60.6718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,64,1,0,8.4954
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,512,1,0,38.0034
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,1,1,0,0.4649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,128,1,0,16.7860
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,16,1,0,4.3324
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,32,1,0,8.4915
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,1024,1,0,59.9271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,3072,1,0,92.6388
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,256,1,0,33.4880
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,1,1,0,0.1833
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,16,1,0,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,64,1,0,18.2292
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,32,1,0,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,64,1,0,0.2918
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,128,1,0,0.3046
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,256,1,0,1.4329
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,512,1,0,0.6174
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,1024,1,0,1.0489
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,1536,1,0,1.4440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,2048,1,0,2.1746
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,3072,1,0,2.8311
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,4096,1,0,3.8419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,6144,1,0,5.5529
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,128,1,0,39.0625
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,512,1,0,64.2656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,10240,1,0,9.3503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,8192,1,0,7.5452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,1,1,0,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,12288,1,0,11.4846
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,16,1,0,0.2561
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,32,1,0,0.2922
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,64,1,0,0.3137
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,128,1,0,0.4035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,16384,1,0,15.5561
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,256,1,0,0.5774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,512,1,0,1.4801
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,1024,1,0,2.0550
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,1536,1,0,2.9276
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,2048,1,0,3.6370
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,256,1,0,59.6864
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,3072,1,0,5.3793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,4096,1,0,7.1632
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,32768,1,0,34.2137
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,6144,1,0,10.7707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,8192,1,0,14.4884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,1,1,0,0.2053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,16,1,0,0.2871
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,10240,1,0,18.1441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,32,1,0,0.3022
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,64,1,0,1.4384
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,128,1,0,0.5749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,1024,1,0,119.7718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,256,1,0,1.0426
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,12288,1,0,22.2085
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,512,1,0,2.0302
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,1024,1,0,3.7583
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,1536,1,0,5.4094
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,16384,1,0,30.3498
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,2048,1,0,6.8328
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,3072,1,0,10.2893
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,4096,1,0,14.2570
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,1,1,0,0.2147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,6144,1,0,20.9607
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,16,1,0,0.3028
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,32,1,0,0.4027
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,8192,1,0,28.1381
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,64,1,0,1.4501
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,128,1,0,0.9753
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,256,1,0,2.0205
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,10240,1,0,35.8647
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,512,1,0,3.8336
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,32768,1,0,66.8448
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,1024,1,0,6.5457
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,12288,1,0,43.5734
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,1536,1,0,9.9973
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,2048,1,0,13.0939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,3072,1,0,20.3449
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,16384,1,0,59.6201
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,1,1,0,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,16,1,0,0.4003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,32,1,0,0.5710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,4096,1,0,27.3227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,64,1,0,1.4949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,128,1,0,1.7758
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,256,1,0,3.8821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,512,1,0,6.6793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,6144,1,0,41.5171
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,1024,1,0,13.1460
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,1536,1,0,19.7069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,8192,1,0,56.1893
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,2048,1,0,26.2129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,1,1,0,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,16,1,0,0.5751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,32,1,0,1.1304
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,64,1,0,1.7689
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,128,1,0,3.3431
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,256,1,0,7.0889
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,3072,1,0,41.6274
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,32768,1,0,124.7570
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,512,1,0,14.1378
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,12288,1,0,86.7022
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,4096,1,0,54.1187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,1,1,0,0.2607
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,16,1,0,1.0683
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,1024,1,0,25.9211
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,32,1,0,1.7771
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,64,1,0,3.3244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,128,1,0,6.6549
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,1536,1,0,38.9335
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,256,1,0,13.0436
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,6144,1,0,82.6604
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,1,1,0,0.3079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,16,1,0,2.0860
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,2048,1,0,52.0395
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,32,1,0,3.3295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,512,1,0,27.7832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,64,1,0,6.6751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,1,1,0,1.3907
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,128,1,0,14.2619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,16,1,0,3.7028
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,32,1,0,6.6685
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,1024,1,0,51.3737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,3072,1,0,81.0244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,256,1,0,25.5427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,1,1,0,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,64,1,0,13.0299
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,16,1,0,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,32,1,0,0.2435
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,64,1,0,0.2815
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,128,1,0,0.2860
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,256,1,0,0.3866
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,512,1,0,0.5640
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,1024,1,0,0.8788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,1536,1,0,1.2808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,2048,1,0,1.6096
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,3072,1,0,2.3917
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,4096,1,0,3.1422
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,128,1,0,30.5368
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,512,1,0,51.1264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,1536,1,0,80.0525
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,6144,1,0,4.7384
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,8192,1,0,6.3739
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,1,1,0,0.1834
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,16,1,0,0.2426
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,10240,1,0,8.0600
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,12288,1,0,9.7826
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,32,1,0,0.2748
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,64,1,0,0.2949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,128,1,0,0.4055
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,256,1,0,0.5228
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,16384,1,0,13.4538
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,512,1,0,0.9130
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,1024,1,0,1.5594
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,1536,1,0,2.2677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,256,1,0,51.2984
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,2048,1,0,2.9579
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,3072,1,0,4.4544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,4096,1,0,6.0387
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,32768,1,0,30.2923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,6144,1,0,9.0785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,1,1,0,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,8192,1,0,12.2767
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,32,1,0,0.2919
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,64,1,0,0.4079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,10240,1,0,15.8065
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,16,1,0,0.2800
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,128,1,0,0.5209
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,256,1,0,0.8477
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,12288,1,0,19.2527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,512,1,0,1.5337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,1024,1,0,3.1748
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,16384,1,0,26.1826
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,1536,1,0,4.4318
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,2048,1,0,5.9887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,3072,1,0,8.6030
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,4096,1,0,11.8390
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,6144,1,0,18.1939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,16,1,0,0.2853
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,1,1,0,0.2094
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,32,1,0,0.3752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,8192,1,0,24.3254
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,64,1,0,0.5164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,128,1,0,1.4816
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,256,1,0,1.5177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,32768,1,0,58.8982
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,10240,1,0,30.7144
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,512,1,0,3.0745
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,1024,1,0,5.5585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,12288,1,0,37.4296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,1536,1,0,8.5161
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,2048,1,0,11.0627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,3072,1,0,17.3103
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,16384,1,0,52.0651
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,1,1,0,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,4096,1,0,23.3416
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,16,1,0,0.4081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,32,1,0,1.4615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,64,1,0,0.8421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,128,1,0,1.5132
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,256,1,0,2.8596
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,6144,1,0,35.7376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,512,1,0,6.6101
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,1024,1,0,10.8952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,1536,1,0,16.6779
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,8192,1,0,48.2501
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,1,1,0,0.2244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,16,1,0,0.6000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,2048,1,0,22.2600
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,32,1,0,0.8438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,64,1,0,1.5140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,128,1,0,2.8341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,10240,1,0,61.2986
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,256,1,0,5.6881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,3072,1,0,35.5037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,512,1,0,10.8375
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,12288,1,0,75.6424
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,1,1,0,0.2543
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,4096,1,0,46.8141
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,1024,1,0,21.9420
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,16,1,0,0.8461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,32,1,0,1.9295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,64,1,0,2.8320
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,128,1,0,6.7659
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,1536,1,0,36.3865
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,256,1,0,11.0327
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,2048,1,0,44.1595
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,1,1,0,0.2908
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,16,1,0,1.5188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,512,1,0,21.8705
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,32,1,0,3.3923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,64,1,0,5.4944
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,128,1,0,10.8330
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,1,1,0,0.4020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,16,1,0,3.3956
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,3072,1,0,67.7729
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,32,1,0,5.4897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,256,1,0,21.7980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,1024,1,0,43.4271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,64,1,0,10.8101
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,1,1,0,0.1712
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,16,1,0,0.2179
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,32,1,0,0.2403
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,64,1,0,0.2690
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,128,1,0,0.2797
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,256,1,0,0.3632
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,512,1,0,0.5047
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,1024,1,0,0.8222
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,1536,1,0,1.1472
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,2048,1,0,1.5732
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,3072,1,0,2.1881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,4096,1,0,91.9575
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,128,1,0,26.1716
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,512,1,0,43.3274
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,4096,1,0,2.9012
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,6144,1,0,4.3693
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,8192,1,0,5.8832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,10240,1,0,7.6776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,1,1,0,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,16,1,0,0.2362
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,12288,1,0,9.1446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,32,1,0,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,64,1,0,0.2884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,128,1,0,0.3584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,256,1,0,43.4077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,16384,1,0,12.7234
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,256,1,0,0.5618
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,512,1,0,0.7989
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,1024,1,0,1.4291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,1536,1,0,2.0717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,2048,1,0,2.7068
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,3072,1,0,4.0923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,4096,1,0,5.4823
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,32768,1,0,28.2594
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,6144,1,0,8.6819
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,8192,1,0,11.4922
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,1,1,0,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,16,1,0,0.2711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,10240,1,0,14.3606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,32,1,0,0.2711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,64,1,0,0.3594
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,128,1,0,0.4921
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,12288,1,0,17.5256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,256,1,0,1.6252
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,512,1,0,1.4080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,1024,1,0,3.1814
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,1536,1,0,3.8963
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,2048,1,0,5.3542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,16384,1,0,24.3895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,3072,1,0,8.4452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,4096,1,0,10.6356
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,1,1,0,0.2071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,6144,1,0,16.2816
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,16,1,0,0.2756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,32,1,0,0.3573
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,8192,1,0,22.3275
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,64,1,0,0.5689
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,128,1,0,1.4756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,256,1,0,1.3968
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,512,1,0,2.6086
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,32768,1,0,55.0066
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,10240,1,0,28.3870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,1024,1,0,5.0802
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,1536,1,0,7.7669
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,12288,1,0,34.6371
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,2048,1,0,11.0605
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,3072,1,0,16.4438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,1,1,0,0.2116
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,16384,1,0,47.8128
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,16,1,0,0.3577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,4096,1,0,21.1301
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,32,1,0,0.4915
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,64,1,0,1.4978
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,128,1,0,1.5453
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,256,1,0,2.6007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,6144,1,0,32.6029
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,512,1,0,5.0248
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,1024,1,0,10.1113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,8192,1,0,44.3316
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,1536,1,0,14.9920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,1,1,0,0.2283
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,16,1,0,1.4498
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,2048,1,0,20.2577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,32,1,0,0.7881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,10240,1,0,56.3703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,64,1,0,1.6494
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,128,1,0,2.5953
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,256,1,0,5.0422
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,3072,1,0,31.1190
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,12288,1,0,69.1970
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,512,1,0,10.1142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,4096,1,0,42.3084
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,1,1,0,0.2458
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,16,1,0,1.4679
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,32,1,0,1.5766
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,1024,1,0,19.9230
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,64,1,0,2.5839
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,128,1,0,5.0028
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,256,1,0,10.0225
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,1536,1,0,33.1707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,1,1,0,0.2851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,16,1,0,1.5821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,32,1,0,2.5844
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,512,1,0,19.8620
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,2048,1,0,41.5120
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,64,1,0,4.9982
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,1,1,0,0.3909
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,128,1,0,10.0438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,16,1,0,2.5847
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,32,1,0,5.1898
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,256,1,0,19.7616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,1024,1,0,39.5291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,64,1,0,11.4107
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,8192,1,0,88.3023
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,1,1,0,0.1698
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,16,1,0,0.2097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,32,1,0,0.2359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,64,1,0,0.2672
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,128,1,0,0.2730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,256,1,0,0.3567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,128,1,0,19.7414
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,512,1,0,1.4405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,1024,1,0,1.4676
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,1536,1,0,1.1054
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,2048,1,0,1.4623
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,3072,1,0,2.1880
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,512,1,0,39.3033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,4096,1,0,3.0438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,6144,1,0,4.3791
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,8192,1,0,5.8123
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,10240,1,0,7.2272
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,1,1,0,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,12288,1,0,8.7125
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,16,1,0,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,32,1,0,0.2643
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,64,1,0,0.2674
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,256,1,0,39.2404
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,128,1,0,0.3522
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,16384,1,0,12.0002
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,256,1,0,1.5208
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,512,1,0,0.8268
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,1024,1,0,1.3721
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,1536,1,0,1.9791
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,2048,1,0,2.5891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,3072,1,0,3.9719
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,4096,1,0,5.4098
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,6144,1,0,7.9725
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,32768,1,0,27.2896
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,8192,1,0,10.8375
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,1,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,10240,1,0,13.9549
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,16,1,0,0.2695
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,32,1,0,0.2713
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,64,1,0,0.3497
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,128,1,0,0.4802
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,12288,1,0,16.8048
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,256,1,0,0.7651
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,512,1,0,1.4458
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,1024,1,0,3.0693
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,1536,1,0,3.9560
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,2048,1,0,4.9195
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,16384,1,0,23.2171
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,3072,1,0,8.1437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,4096,1,0,10.5617
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,6144,1,0,15.5805
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,1,1,0,0.1929
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,16,1,0,0.2854
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,32,1,0,0.3493
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,8192,1,0,21.3526
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,64,1,0,0.4792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,128,1,0,0.7608
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,256,1,0,1.3364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,32768,1,0,53.0119
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,512,1,0,2.4838
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,10240,1,0,27.2168
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,1024,1,0,5.8340
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,12288,1,0,32.9764
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,1536,1,0,8.1903
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,2048,1,0,9.6174
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,3072,1,0,14.9038
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,16384,1,0,45.7908
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,1,1,0,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,16,1,0,0.3867
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,4096,1,0,20.3711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,32,1,0,0.5587
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,64,1,0,0.7597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,128,1,0,1.9163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,256,1,0,2.4700
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,512,1,0,4.7774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,6144,1,0,31.1881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,1024,1,0,9.6399
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,8192,1,0,42.3435
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,1536,1,0,14.2582
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,2048,1,0,19.2594
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,1,1,0,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,16,1,0,0.5669
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,32,1,0,0.7627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,64,1,0,1.5037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,128,1,0,3.0093
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,10240,1,0,53.9528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,3072,1,0,29.7036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,256,1,0,5.9653
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,512,1,0,9.3777
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,4096,1,0,42.0355
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,1,1,0,0.2392
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,16,1,0,1.5057
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,32,1,0,1.3308
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,1024,1,0,18.9728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,64,1,0,2.4616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,128,1,0,4.7499
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,256,1,0,11.7761
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,1536,1,0,32.2038
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,1,1,0,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,6144,1,0,61.9528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,16,1,0,1.3345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,512,1,0,18.5977
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,2048,1,0,38.2304
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,32,1,0,3.0395
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,64,1,0,6.5152
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,1,1,0,0.3840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,128,1,0,9.5553
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,16,1,0,3.3332
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,32,1,0,4.7482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,64,1,0,9.3361
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,256,1,0,18.5768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,1024,1,0,40.3447
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,1,1,0,0.1748
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,16,1,0,0.2080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,32,1,0,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,128,1,0,18.7702
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,64,1,0,0.2592
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,128,1,0,0.2677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,256,1,0,0.3547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,512,1,0,0.4857
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,1024,1,0,0.7825
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,1536,1,0,1.0894
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,2048,1,0,1.6568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,512,1,0,46.0307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,3072,1,0,2.0624
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,4096,1,0,2.7132
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,6144,1,0,4.0894
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,8192,1,0,5.7164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,10240,1,0,7.1613
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,1,1,0,0.1849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,12288,1,0,8.5209
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,2048,1,0,82.8102
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,256,1,0,47.2290
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,16,1,0,0.2322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,16384,1,0,11.7357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,32,1,0,0.2684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,64,1,0,0.2823
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,128,1,0,0.3488
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,256,1,0,0.5617
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,512,1,0,1.5049
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,1024,1,0,1.6087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,1536,1,0,2.0094
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,2048,1,0,2.5226
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,3072,1,0,4.0012
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,4096,1,0,5.1139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,32768,1,0,26.7783
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,6144,1,0,8.0030
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,8192,1,0,10.7568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,512,1,0,74.4590
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,1,1,0,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,10240,1,0,13.8539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,16,1,0,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,32,1,0,0.2698
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,64,1,0,0.3479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,12288,1,0,16.4506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,128,1,0,1.4588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,256,1,0,1.4672
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,512,1,0,1.4333
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,1024,1,0,2.4585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,16384,1,0,22.7499
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,1536,1,0,3.6216
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,2048,1,0,4.9904
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,3072,1,0,7.9507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,4096,1,0,10.1295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,1,1,0,0.2009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,6144,1,0,15.2091
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,16,1,0,0.2742
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,32,1,0,0.3621
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,8192,1,0,20.6798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,64,1,0,0.5577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,128,1,0,0.8062
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,256,1,0,1.5822
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,32768,1,0,52.0479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,10240,1,0,26.8452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,512,1,0,2.8620
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,1024,1,0,4.7133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,12288,1,0,32.4356
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,1536,1,0,7.0451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,2048,1,0,9.8693
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,1,1,0,0.2092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,3072,1,0,14.7511
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,16384,1,0,44.8227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,16,1,0,0.3619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,32,1,0,0.4739
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,4096,1,0,20.7403
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,64,1,0,0.7473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,128,1,0,1.5744
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,256,1,0,2.6310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,512,1,0,4.6457
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,6144,1,0,30.2554
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,1024,1,0,9.9942
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,8192,1,0,41.4004
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,1536,1,0,14.1039
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,1,1,0,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,16,1,0,0.4754
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,2048,1,0,20.5771
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,32,1,0,0.7493
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,64,1,0,1.4857
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,10240,1,0,52.7068
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,128,1,0,2.4061
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,3072,1,0,28.9773
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,256,1,0,5.8481
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,512,1,0,9.3181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,4096,1,0,39.2952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,1,1,0,0.2373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,1024,1,0,19.6988
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,16,1,0,1.4671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,32,1,0,1.3040
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,64,1,0,3.0139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,1536,1,0,27.7684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,128,1,0,6.1605
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,6144,1,0,62.2324
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,256,1,0,9.1129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,2048,1,0,40.7999
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,1,1,0,0.2782
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,512,1,0,20.3001
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,16,1,0,1.3053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,32,1,0,2.4012
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,64,1,0,4.6273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,3072,1,0,57.5485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,128,1,0,10.8206
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,1024,1,0,36.5577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,1,1,0,0.3775
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,16,1,0,2.6289
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,256,1,0,22.5888
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,4096,1,0,78.1633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,32,1,0,5.6365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,64,1,0,12.1189
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,1,1,0,0.1747
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,1536,1,0,57.9074
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,16,1,0,0.2076
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,32,1,0,0.2305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,64,1,0,0.2621
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,128,1,0,18.2623
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,512,1,0,0.4828
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,512,1,0,36.3063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,256,1,0,0.3582
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,1024,1,0,1.4932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,128,1,0,0.2649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,1536,1,0,1.0771
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,2048,1,0,1.3806
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,3072,1,0,2.2689
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,4096,1,0,2.9106
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,6144,1,0,4.0424
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,8192,1,0,5.4573
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,2048,1,0,76.8591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,10240,1,0,6.9247
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,12288,1,0,8.6031
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,1,1,0,0.1752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,256,1,0,36.2690
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,16,1,0,0.2271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,32,1,0,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,64,1,0,0.2768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,16384,1,0,11.8241
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,128,1,0,0.3451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,256,1,0,0.5111
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,512,1,0,0.7520
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,1024,1,0,1.4977
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,1536,1,0,2.1024
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,2048,1,0,2.4971
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,3072,1,0,3.7669
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,4096,1,0,5.0511
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,32768,1,0,26.5679
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,6144,1,0,7.9031
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,8192,1,0,10.6349
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,1,1,0,0.1788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,10240,1,0,13.3277
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,16,1,0,0.2615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,32,1,0,0.2641
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,64,1,0,0.3426
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,128,1,0,0.5153
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,12288,1,0,16.2533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,512,1,0,1.5708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,256,1,0,1.5094
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,1024,1,0,2.7752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,1536,1,0,4.2280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,16384,1,0,22.6416
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,2048,1,0,5.1219
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,3072,1,0,7.6495
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,4096,1,0,9.7890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,1,1,0,0.1861
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,6144,1,0,15.2305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,16,1,0,0.2772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,32,1,0,0.3400
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,8192,1,0,20.9785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,64,1,0,1.5721
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,128,1,0,0.8958
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,256,1,0,1.5344
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,32768,1,0,51.5366
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,10240,1,0,26.2427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,512,1,0,2.6718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,1024,1,0,4.8413
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,12288,1,0,32.1058
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,1536,1,0,7.7724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,2048,1,0,9.2428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,3072,1,0,15.2237
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,1,1,0,0.2004
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,16384,1,0,44.3075
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,4096,1,0,19.5790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,16,1,0,0.3750
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,32,1,0,0.4686
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,64,1,0,0.7404
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,128,1,0,1.5787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,256,1,0,2.8759
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,6144,1,0,30.7996
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,512,1,0,4.8385
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,1024,1,0,9.7075
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,8192,1,0,40.9015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,1536,1,0,15.5149
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,2048,1,0,18.3515
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,1,1,0,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,16,1,0,0.5492
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,32,1,0,0.8752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,64,1,0,1.5592
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,10240,1,0,52.1057
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,128,1,0,2.3724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,3072,1,0,28.5624
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,256,1,0,5.7728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,512,1,0,9.0346
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,4096,1,0,38.8056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,1,1,0,0.2403
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,16,1,0,0.9099
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,1024,1,0,18.2565
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,32,1,0,1.2881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,64,1,0,2.5788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,128,1,0,4.5597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,1536,1,0,27.4128
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,16384,1,0,88.3357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,256,1,0,11.4341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,1,1,0,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,2048,1,0,37.9827
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,16,1,0,1.2916
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,32,1,0,2.3739
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,512,1,0,18.1459
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,64,1,0,5.9437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,1,1,0,1.3977
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,128,1,0,9.1726
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,16,1,0,2.7595
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,3072,1,0,56.7249
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,32,1,0,4.5646
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,256,1,0,17.8811
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,64,1,0,9.1636
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,0,0.1997
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,1024,1,0,43.0099
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,16,1,0,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,32,1,0,0.2465
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,64,1,0,0.2738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,128,1,0,18.0172
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,128,1,0,0.3557
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,256,1,0,0.4790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,512,1,0,35.9842
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,512,1,0,2.0856
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1024,1,0,1.3155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1536,1,0,1.8950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,2048,1,0,2.7788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,3072,1,0,3.7116
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,4096,1,0,4.9479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,6144,1,0,7.6263
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,0,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,256,1,0,35.8142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,8192,1,0,10.1359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,10240,1,0,12.5504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,16,1,0,0.2479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,32,1,0,0.2728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,12288,1,0,15.2129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,64,1,0,0.3431
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,128,1,0,0.4761
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,256,1,0,0.7471
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,512,1,0,1.3131
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,16384,1,0,20.7677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1024,1,0,2.4720
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1536,1,0,3.8864
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,2048,1,0,4.8199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,3072,1,0,7.4322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,4096,1,0,9.6808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,6144,1,0,14.6280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,32768,1,0,37.1440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,0,0.2198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,8192,1,0,19.6953
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,16,1,0,0.2731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,32,1,0,0.3449
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,64,1,0,0.4766
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,10240,1,0,25.0281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,128,1,0,0.7512
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,256,1,0,1.3110
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,512,1,0,2.4628
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1024,1,0,4.7836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,12288,1,0,30.1707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,16384,1,0,33.9819
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,2048,1,0,9.6509
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1536,1,0,7.1186
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,3072,1,0,14.2708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,4096,1,0,19.1288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,0,0.2271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,16,1,0,0.3441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,6144,1,0,29.1551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,32,1,0,0.4775
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,64,1,0,0.7500
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,8192,1,0,32.0036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,128,1,0,1.3118
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,256,1,0,2.4617
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,32768,1,0,73.8285
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,512,1,0,4.7692
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,10240,1,0,40.5276
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1024,1,0,9.5952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,12288,1,0,49.2617
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1536,1,0,14.0025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,2048,1,0,18.6692
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,0,0.2339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,16384,1,0,67.2324
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,3072,1,0,28.3995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,16,1,0,0.4787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,32,1,0,0.7522
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,4096,1,0,30.9661
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,64,1,0,1.3111
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,128,1,0,2.4611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,256,1,0,4.7712
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,512,1,0,9.3401
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,6144,1,0,47.9269
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1024,1,0,18.7155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1536,1,0,27.8600
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,8192,1,0,63.6211
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,2048,1,0,30.0239
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,0,0.2447
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,16,1,0,0.7529
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,32,1,0,2.1563
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,64,1,0,2.4577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,128,1,0,4.7562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,3072,1,0,45.6901
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,256,1,0,9.5629
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,10240,1,0,80.6925
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,512,1,0,18.6468
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,0,0.2736
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,16,1,0,2.1829
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,4096,1,0,61.5703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,32,1,0,2.7579
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1024,1,0,29.7665
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,64,1,0,4.7497
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,128,1,0,9.5128
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1536,1,0,44.6452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,0,0.3428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,256,1,0,18.6115
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,16,1,0,2.4553
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,32,1,0,4.9597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,6144,1,0,91.8090
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,64,1,0,9.3163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,512,1,0,29.6593
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,2048,1,0,59.6504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,0,0.4792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,128,1,0,18.5788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,16,1,0,4.7553
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,32,1,0,9.4890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,256,1,0,29.7360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1024,1,0,59.1416
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,64,1,0,18.4183
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,0,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,16,1,0,0.2106
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,32,1,0,0.2315
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,64,1,0,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,128,1,0,0.3049
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,256,1,0,0.4106
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,512,1,0,0.6167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1536,1,0,1.5569
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,2048,1,0,2.0148
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,128,1,0,29.5829
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1024,1,0,1.0920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,3072,1,0,2.9959
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,4096,1,0,3.9713
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,512,1,0,59.0166
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,6144,1,0,6.2248
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,8192,1,0,8.0614
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,0,0.1879
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,10240,1,0,10.1743
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,12288,1,0,12.3365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,16,1,0,0.2261
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,32,1,0,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,64,1,0,0.3043
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,128,1,0,0.4147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,16384,1,0,16.8206
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,256,1,0,2.0722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,256,1,0,58.8801
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,512,1,0,1.0885
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1024,1,0,2.0017
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1536,1,0,2.9174
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,2048,1,0,4.1199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,3072,1,0,5.8052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,4096,1,0,7.7914
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,32768,1,0,36.8270
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,6144,1,0,11.9884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,8192,1,0,15.9061
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,0,0.1887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,16,1,0,0.2464
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,32,1,0,0.3059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,10240,1,0,20.3173
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,64,1,0,2.0770
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,128,1,0,0.6159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,256,1,0,1.0844
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,12288,1,0,24.6423
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,512,1,0,1.9951
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1024,1,0,4.0640
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1536,1,0,5.6692
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1024,1,0,127.6762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,2048,1,0,7.7486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,16384,1,0,33.5199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,3072,1,0,11.6286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,4096,1,0,15.5759
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,0,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,6144,1,0,23.5448
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,16,1,0,0.3042
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,32,1,0,2.0761
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,64,1,0,0.6198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,128,1,0,2.1818
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,32768,1,0,66.0018
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,8192,1,0,31.7545
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,256,1,0,2.3310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,512,1,0,3.7948
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,10240,1,0,40.1221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1024,1,0,7.6664
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1536,1,0,11.1974
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,12288,1,0,48.7197
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,2048,1,0,14.8930
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,0,0.2109
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,3072,1,0,22.8756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,16,1,0,0.4146
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,16384,1,0,59.3419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,32,1,0,2.1093
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,64,1,0,1.0879
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,128,1,0,2.3232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,4096,1,0,30.7045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,256,1,0,3.7977
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,512,1,0,7.6376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,6144,1,0,46.5620
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1024,1,0,14.7598
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,0,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1536,1,0,22.3611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,16,1,0,2.2942
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,32,1,0,1.0851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,64,1,0,1.9889
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,2048,1,0,29.7579
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,128,1,0,4.0371
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,256,1,0,7.6423
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,10240,1,0,70.7895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,512,1,0,14.9600
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,3072,1,0,45.2021
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,0,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,16,1,0,2.1694
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,32,1,0,1.9903
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1024,1,0,29.4984
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,64,1,0,4.0449
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,128,1,0,7.4284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1536,1,0,44.1290
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,256,1,0,14.6979
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,2048,1,0,51.8273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,16384,1,0,119.1654
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,0,0.3059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,16,1,0,2.3066
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,512,1,0,29.3974
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,32,1,0,3.7846
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,64,1,0,7.4361
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,0,0.4385
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,128,1,0,14.6521
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,3072,1,0,79.1845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,16,1,0,3.7836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,32,1,0,7.4341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1024,1,0,51.3237
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,256,1,0,29.3271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,0,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,64,1,0,14.6577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,16,1,0,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,32,1,0,0.2059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,64,1,0,0.2258
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,128,1,0,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,256,1,0,2.1069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,512,1,0,0.5225
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,4096,1,0,107.0610
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,128,1,0,29.2427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1024,1,0,0.8812
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1536,1,0,2.1667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,512,1,0,51.1222
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,2048,1,0,1.6396
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,3072,1,0,2.4071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,4096,1,0,3.1868
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,6144,1,0,4.7936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,8192,1,0,6.7018
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,0,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,10240,1,0,8.4146
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,256,1,0,51.0714
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,12288,1,0,9.9717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,16,1,0,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,32,1,0,0.2209
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,16384,1,0,13.8669
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,64,1,0,0.2757
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,128,1,0,0.3653
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,256,1,0,0.5233
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,512,1,0,0.8780
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1024,1,0,1.6143
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1536,1,0,3.2356
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,2048,1,0,3.4731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,3072,1,0,4.8869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,32768,1,0,30.4705
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,4096,1,0,6.2060
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,6144,1,0,9.4427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,0,0.1785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,8192,1,0,12.7907
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,16,1,0,0.2234
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,32,1,0,0.2731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,10240,1,0,16.4101
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,64,1,0,0.3646
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,128,1,0,0.5227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,256,1,0,0.8731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,12288,1,0,19.9250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,512,1,0,2.2288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1024,1,0,3.0273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,16384,1,0,27.3528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1536,1,0,4.7405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,2048,1,0,5.9582
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,3072,1,0,9.0958
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,4096,1,0,12.4289
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,512,1,0,117.2411
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,0,0.1858
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,6144,1,0,18.9077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,16,1,0,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,32,1,0,0.3932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,8192,1,0,25.5086
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,64,1,0,0.5248
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,32768,1,0,60.7924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,128,1,0,0.8764
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,256,1,0,1.6027
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,10240,1,0,32.3967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,512,1,0,3.3267
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1024,1,0,6.1316
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,12288,1,0,39.4326
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1536,1,0,9.6075
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,2048,1,0,11.7923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,3072,1,0,18.1852
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,16384,1,0,54.1113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,0,0.1941
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,4096,1,0,24.4437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,16,1,0,0.3648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,32,1,0,0.5250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,64,1,0,2.1482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,128,1,0,1.6015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,6144,1,0,37.3770
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,256,1,0,3.0007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,512,1,0,6.0910
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1024,1,0,11.8303
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,8192,1,0,50.5710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1536,1,0,17.5015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,2048,1,0,23.5655
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,10240,1,0,64.2131
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,0,0.2064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,16,1,0,0.5260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,32,1,0,0.8764
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,64,1,0,2.2206
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,128,1,0,2.9968
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,3072,1,0,35.9619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,12288,1,0,78.3159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,256,1,0,5.8477
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,512,1,0,11.7793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,4096,1,0,48.4877
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,0,0.2220
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,16384,1,0,100.5170
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,16,1,0,0.8789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1024,1,0,23.3104
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,32,1,0,2.2202
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,64,1,0,3.0031
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,128,1,0,5.8490
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1536,1,0,38.1474
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,256,1,0,11.5716
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,6144,1,0,74.0752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,0,0.2716
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,2048,1,0,46.6003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,16,1,0,1.6034
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,32,1,0,3.2809
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,512,1,0,23.2076
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,64,1,0,6.9653
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,128,1,0,11.5360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,0,0.3663
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,16,1,0,3.2772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1024,1,0,46.0864
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,256,1,0,23.1155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,32,1,0,6.9801
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,64,1,0,11.5435
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,0,0.1483
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,16,1,0,0.1830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,32,1,0,0.1871
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,64,1,0,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,128,1,0,0.2069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,128,1,0,23.1024
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,256,1,0,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,512,1,0,0.3271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,512,1,0,45.8829
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1536,1,0,69.2398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1536,1,0,0.7553
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,2048,1,0,1.0295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,3072,1,0,1.6565
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1024,1,0,2.1160
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,4096,1,0,2.5717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,6144,1,0,3.6035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,8192,1,0,4.9008
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,10240,1,0,6.3256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,0,0.1461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,2048,1,0,85.5488
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,12288,1,0,7.7012
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,16,1,0,0.1847
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,32,1,0,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,64,1,0,0.2033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,16384,1,0,10.9272
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,256,1,0,45.7888
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,128,1,0,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,256,1,0,0.3141
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,512,1,0,0.4783
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1024,1,0,0.8593
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1536,1,0,1.3459
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,2048,1,0,1.8879
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,3072,1,0,3.4016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,4096,1,0,4.4333
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,32768,1,0,25.1020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,6144,1,0,6.9511
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,8192,1,0,9.8256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,0,0.1544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,16,1,0,0.1934
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,10240,1,0,12.4562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,32,1,0,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,64,1,0,0.2370
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,128,1,0,0.3075
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,12288,1,0,15.5787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,256,1,0,0.4541
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,512,1,0,0.7787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1024,1,0,1.5649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1536,1,0,2.7647
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,2048,1,0,3.6229
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,16384,1,0,21.7630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,3072,1,0,6.1136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,4096,1,0,8.7697
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,32768,1,0,30.0099
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,6144,1,0,14.1080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,0,0.1664
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,16,1,0,0.2072
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,32,1,0,0.2388
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,64,1,0,0.3065
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,8192,1,0,19.5986
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,128,1,0,0.4404
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,256,1,0,0.7357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,512,1,0,2.1288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1024,1,0,2.9956
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,10240,1,0,25.0473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,16384,1,0,23.3874
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1536,1,0,4.9455
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,12288,1,0,30.7386
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,2048,1,0,7.2464
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,3072,1,0,12.2136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,0,0.1773
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,4096,1,0,17.3597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,16,1,0,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,32,1,0,0.3075
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,64,1,0,0.4379
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,8192,1,0,19.9410
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,128,1,0,0.7074
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,256,1,0,1.2955
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,6144,1,0,28.2748
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,10240,1,0,25.8782
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1024,1,0,5.8451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,512,1,0,2.8875
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,12288,1,0,32.3687
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1536,1,0,9.9270
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,0,0.1869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,16,1,0,0.3041
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,2048,1,0,14.4031
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,32,1,0,0.4416
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,64,1,0,2.3044
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,128,1,0,2.4058
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,4096,1,0,17.9618
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,256,1,0,2.4627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,512,1,0,5.1715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,3072,1,0,24.9335
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,6144,1,0,28.3891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,0,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,16,1,0,0.4427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1024,1,0,11.6070
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,32,1,0,0.7077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,64,1,0,1.2478
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,128,1,0,2.3567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,2048,1,0,16.3832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1536,1,0,19.6860
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,256,1,0,4.8019
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,0,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,512,1,0,10.5165
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,3072,1,0,25.8465
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,16,1,0,0.7126
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,32,1,0,2.1044
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,64,1,0,2.3553
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1024,1,0,16.1502
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,128,1,0,4.7741
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,0,0.2373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,16,1,0,1.2544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,256,1,0,9.6587
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,32,1,0,2.3587
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,64,1,0,4.6061
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,512,1,0,16.0305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,0,0.1521
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,128,1,0,9.2566
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,16,1,0,0.1742
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,2048,1,0,32.4876
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,32,1,0,0.1869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,64,1,0,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,128,1,0,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,256,1,0,0.2330
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,512,1,0,0.2999
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1024,1,0,0.4853
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,256,1,0,15.9803
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1536,1,0,0.7164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,2048,1,0,1.0011
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,3072,1,0,1.6083
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,4096,1,0,2.2229
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,6144,1,0,3.7990
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,8192,1,0,4.8814
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,10240,1,0,6.3700
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,16,1,0,0.1869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,32,1,0,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,12288,1,0,7.8334
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,0,0.1565
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,64,1,0,0.2011
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,128,1,0,0.2280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,256,1,0,0.2959
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,512,1,0,0.4362
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,16384,1,0,11.0096
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1024,1,0,2.1109
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1536,1,0,1.2556
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,2048,1,0,1.7780
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,3072,1,0,3.0481
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,4096,1,0,4.3618
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,32768,1,0,25.2396
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,6144,1,0,7.0139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,0,0.1584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,8192,1,0,9.7056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,16,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,32,1,0,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,64,1,0,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,10240,1,0,12.5354
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,12288,1,0,15.4694
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,128,1,0,0.2842
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,256,1,0,0.4113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,512,1,0,0.6973
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1024,1,0,1.4058
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1536,1,0,2.6760
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,16384,1,0,21.4614
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,2048,1,0,3.4764
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,3072,1,0,6.2182
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,4096,1,0,8.6187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,32768,1,0,27.5159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,6144,1,0,13.9253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,0,0.1625
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,16,1,0,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,32,1,0,0.2280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,8192,1,0,19.2349
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,64,1,0,0.2840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,128,1,0,0.3965
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,256,1,0,0.6504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,512,1,0,1.2296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1024,1,0,2.9579
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,16384,1,0,20.9102
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,10240,1,0,24.8545
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1536,1,0,4.6001
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,2048,1,0,6.7905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,12288,1,0,30.7901
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,3072,1,0,12.0404
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,0,0.1730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,4096,1,0,17.1756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,32,1,0,0.2855
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,16,1,0,0.2284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,64,1,0,0.3984
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,8192,1,0,17.4558
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,128,1,0,0.6260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,256,1,0,1.1339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,512,1,0,2.3252
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,10240,1,0,22.8053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,6144,1,0,27.6064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1024,1,0,5.2792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,12288,1,0,28.6539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1536,1,0,9.2490
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,0,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,16,1,0,0.2850
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,2048,1,0,13.9258
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,32,1,0,0.3969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,64,1,0,0.6269
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,128,1,0,1.0853
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,4096,1,0,15.4786
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,256,1,0,2.1275
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,512,1,0,4.7305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,3072,1,0,23.9876
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1024,1,0,10.6473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,6144,1,0,24.6234
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,0,0.1888
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,16,1,0,0.4007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,32,1,0,0.6296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,2048,1,0,13.8625
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,64,1,0,1.0870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,128,1,0,2.5877
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1536,1,0,18.4373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,256,1,0,4.3376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,3072,1,0,22.0966
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,0,0.1980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,512,1,0,9.1612
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,16,1,0,0.6311
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,32,1,0,1.0857
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1024,1,0,13.6681
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,64,1,0,2.0395
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,128,1,0,4.1432
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,256,1,0,8.2762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,0,0.2260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1536,1,0,20.5011
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,16,1,0,1.0922
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,32,1,0,2.0407
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,512,1,0,13.5476
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,64,1,0,3.9518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,0,0.1483
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,16,1,0,0.1712
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,128,1,0,7.9381
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,32,1,0,0.1769
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,64,1,0,0.1887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,128,1,0,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,256,1,0,0.2278
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,512,1,0,0.2947
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1024,1,0,0.4722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,256,1,0,13.4746
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1536,1,0,0.6852
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,2048,1,0,0.9602
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,3072,1,0,1.5656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,4096,1,0,2.4664
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,6144,1,0,3.6700
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,8192,1,0,4.7836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,0,0.1584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,16,1,0,0.1827
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,12288,1,0,7.5844
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,10240,1,0,6.0961
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,32,1,0,0.1932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,64,1,0,0.2012
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,128,1,0,0.2224
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,256,1,0,0.2839
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,16384,1,0,10.7578
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,512,1,0,0.4219
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1024,1,0,0.7580
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1536,1,0,2.1119
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,2048,1,0,2.2055
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,3072,1,0,2.9181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,4096,1,0,4.2090
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,32768,1,0,24.3683
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,6144,1,0,6.9384
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,8192,1,0,9.4294
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,0,0.1548
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,10240,1,0,12.1908
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,16,1,0,0.1893
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,32,1,0,0.1974
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,64,1,0,0.2239
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,128,1,0,0.2751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,12288,1,0,14.9900
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,256,1,0,0.3949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,512,1,0,0.6581
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1024,1,0,1.3496
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1536,1,0,2.2415
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,16384,1,0,21.0027
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,2048,1,0,3.3330
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,3072,1,0,5.9734
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,4096,1,0,8.2152
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,32768,1,0,26.2588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,6144,1,0,13.4522
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,0,0.1644
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,16,1,0,0.2011
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,32,1,0,0.2219
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,8192,1,0,18.7569
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,64,1,0,0.2725
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,128,1,0,0.3821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,256,1,0,0.6161
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,512,1,0,1.1438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,16384,1,0,19.6673
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,10240,1,0,24.3650
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1024,1,0,2.5456
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1536,1,0,4.3912
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,12288,1,0,30.0708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,2048,1,0,6.7180
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,3072,1,0,11.4855
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,0,0.1815
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,4096,1,0,16.3651
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,16,1,0,0.2233
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,32,1,0,0.2775
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,64,1,0,0.3790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,8192,1,0,16.2278
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,128,1,0,2.3021
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,256,1,0,1.0527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,512,1,0,2.1791
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,10240,1,0,21.2653
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1024,1,0,5.1868
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,6144,1,0,26.7296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,12288,1,0,26.8201
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,0,0.1803
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1536,1,0,8.7059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,16,1,0,0.2752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,2048,1,0,13.1365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,32,1,0,0.3831
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,64,1,0,2.1172
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,4096,1,0,14.2276
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,128,1,0,1.0065
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,256,1,0,1.9734
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,512,1,0,4.2390
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,3072,1,0,22.9531
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,6144,1,0,22.7705
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1024,1,0,10.0099
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,0,0.1863
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,16,1,0,0.3841
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,32,1,0,2.2121
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,64,1,0,1.0064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,128,1,0,1.8792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,2048,1,0,12.6462
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,256,1,0,3.8323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1536,1,0,17.3715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,0,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,16,1,0,0.5941
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,32,1,0,1.0106
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,512,1,0,8.3710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,64,1,0,1.8873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,128,1,0,3.6528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1024,1,0,12.4272
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,0,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,256,1,0,7.6783
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,16,1,0,1.0130
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1536,1,0,18.6454
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,32,1,0,1.8896
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,512,1,0,12.3064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,64,1,0,3.8398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,0,0.1360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,128,1,0,7.1551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,16,1,0,0.1774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,32,1,0,0.1806
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,64,1,0,0.1859
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,128,1,0,0.1996
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,256,1,0,0.2241
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,256,1,0,12.2701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1024,1,0,24.6187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,512,1,0,0.2889
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1024,1,0,0.4575
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1536,1,0,0.6732
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,2048,1,0,0.9341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,3072,1,0,2.1774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,4096,1,0,2.1284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,6144,1,0,3.4061
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,8192,1,0,4.6463
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,0,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,16,1,0,0.1754
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,32,1,0,0.1865
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,12288,1,0,7.6179
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,64,1,0,0.1955
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,128,1,0,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,256,1,0,0.2802
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,10240,1,0,6.0666
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,512,1,0,0.4104
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,16384,1,0,10.3505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1024,1,0,0.7406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1536,1,0,2.2756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,2048,1,0,2.1923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,3072,1,0,2.9088
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,4096,1,0,4.1190
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,32768,1,0,24.0624
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,6144,1,0,6.6755
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,0,0.1516
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,8192,1,0,9.1632
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,10240,1,0,12.1514
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,16,1,0,0.1889
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,32,1,0,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,64,1,0,0.2198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,128,1,0,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,12288,1,0,14.8140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,256,1,0,0.3863
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,512,1,0,0.6491
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1024,1,0,1.3056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1536,1,0,2.1606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,16384,1,0,20.7568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,2048,1,0,3.2370
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,3072,1,0,5.6285
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,32768,1,0,25.6704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,4096,1,0,8.2727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,6144,1,0,13.2133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,0,0.1593
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,16,1,0,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,8192,1,0,18.4740
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,32,1,0,0.2201
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,64,1,0,0.2704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,128,1,0,0.3738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,256,1,0,2.1132
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,16384,1,0,19.0614
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,512,1,0,1.1090
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,10240,1,0,23.8593
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1024,1,0,2.4749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1536,1,0,4.2628
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,2048,1,0,6.4352
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,12288,1,0,29.4472
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,3072,1,0,11.2677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,0,0.1671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,16,1,0,0.2202
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,4096,1,0,16.2477
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,8192,1,0,15.5923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,32,1,0,0.2690
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,64,1,0,0.3728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,128,1,0,0.5738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,10240,1,0,20.4725
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,256,1,0,1.0149
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,512,1,0,2.1140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,6144,1,0,26.2649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1024,1,0,5.0052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,12288,1,0,25.8990
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1536,1,0,8.4255
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,0,0.1737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,16,1,0,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,32,1,0,0.3748
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,2048,1,0,12.9836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,64,1,0,2.1181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,128,1,0,0.9716
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,4096,1,0,13.6255
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,256,1,0,1.9007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,512,1,0,4.2873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,3072,1,0,22.5371
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,0,0.1827
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1024,1,0,9.5751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,16,1,0,0.3762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,32,1,0,0.5745
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,64,1,0,0.9694
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,2048,1,0,12.0332
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,128,1,0,2.3887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1536,1,0,16.8706
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,256,1,0,3.6791
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,3072,1,0,19.3453
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,512,1,0,8.2837
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,0,0.1973
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,16,1,0,2.0882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1024,1,0,11.8056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,32,1,0,0.9724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,4096,1,0,26.9512
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,64,1,0,1.8108
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,128,1,0,3.7034
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,256,1,0,7.3689
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,0,0.2181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,16,1,0,0.9748
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,32,1,0,1.8151
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,64,1,0,3.7082
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,512,1,0,11.6936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,2048,1,0,23.8518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,0,0.1428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,16,1,0,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,128,1,0,6.8421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,32,1,0,0.1709
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,64,1,0,0.1851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,128,1,0,0.1913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,256,1,0,0.2206
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,512,1,0,0.2848
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,256,1,0,11.6427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1024,1,0,23.3987
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1024,1,0,0.4533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1536,1,0,0.6703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,3072,1,0,1.5236
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,2048,1,0,2.3351
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,4096,1,0,2.0931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,6144,1,0,3.6052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,8192,1,0,4.6170
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,0,0.1500
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,16,1,0,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,10240,1,0,5.9582
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,32,1,0,0.1829
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,12288,1,0,7.4275
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,64,1,0,0.1919
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,128,1,0,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,16384,1,0,10.4391
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,256,1,0,0.2752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,512,1,0,0.4035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1024,1,0,0.7311
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1536,1,0,1.1467
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,2048,1,0,1.6745
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,3072,1,0,3.1577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,4096,1,0,4.1177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,32768,1,0,24.2784
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,6144,1,0,6.5370
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,0,0.1562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,8192,1,0,9.2993
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,10240,1,0,11.9448
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,16,1,0,0.1810
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,32,1,0,0.1972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,64,1,0,0.2167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,128,1,0,0.2621
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,12288,1,0,14.5622
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,256,1,0,0.3812
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1024,1,0,1.2766
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,512,1,0,2.3048
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1536,1,0,2.1294
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,16384,1,0,20.7902
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,2048,1,0,3.2113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,3072,1,0,5.7786
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,32768,1,0,25.3427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,4096,1,0,8.1849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,0,0.1707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,6144,1,0,12.9127
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,16,1,0,0.1917
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,32,1,0,0.2173
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,64,1,0,0.2676
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,128,1,0,0.3654
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,8192,1,0,18.0886
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,256,1,0,0.5872
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,512,1,0,2.1097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,16384,1,0,18.7385
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1024,1,0,2.4511
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,10240,1,0,23.8354
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1536,1,0,4.1803
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,2048,1,0,6.5182
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,12288,1,0,29.1643
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,3072,1,0,11.2529
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,0,0.1750
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,16,1,0,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,4096,1,0,15.9386
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,32,1,0,0.2630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,8192,1,0,15.3039
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,64,1,0,0.3673
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,128,1,0,2.2537
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,256,1,0,0.9975
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,512,1,0,2.0537
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,10240,1,0,20.1064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,6144,1,0,26.2663
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1024,1,0,5.0467
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1536,1,0,8.2845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,0,0.1749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,16,1,0,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,32,1,0,0.3677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,2048,1,0,12.7520
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,4096,1,0,13.3261
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,64,1,0,0.5661
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,128,1,0,2.1011
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,256,1,0,1.8647
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,512,1,0,4.2087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,3072,1,0,22.6020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1024,1,0,9.4960
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,0,0.1832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,16,1,0,0.3689
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,32,1,0,0.5659
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,2048,1,0,11.7255
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,64,1,0,0.9546
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,8192,1,0,30.3063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,128,1,0,1.7798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1536,1,0,16.6688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,256,1,0,3.8054
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,0,0.1901
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,16,1,0,0.5647
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,32,1,0,0.9559
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,512,1,0,7.9096
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,64,1,0,2.2835
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1024,1,0,11.4799
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,128,1,0,3.4239
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,0,0.2136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,256,1,0,7.2224
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1536,1,0,17.2696
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,16,1,0,2.0810
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,512,1,0,11.3682
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,32,1,0,1.7804
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,64,1,0,3.4264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,1,1,0,0.2129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,128,1,0,6.6924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,16,1,0,0.2449
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,32,1,0,0.2710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1024,1,0,22.7778
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,64,1,0,0.3178
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,256,1,0,0.4939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,128,1,0,2.1345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,256,1,0,11.3205
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,512,1,0,0.7879
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,1024,1,0,1.3955
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,1536,1,0,2.0051
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,2048,1,0,2.6400
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,3072,1,0,3.9336
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,512,1,0,22.5473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,4096,1,0,5.4705
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,6144,1,0,8.0842
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,1,1,0,0.2256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,8192,1,0,11.0077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,16,1,0,0.2738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,32,1,0,0.3228
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,10240,1,0,13.5158
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,64,1,0,0.3568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,128,1,0,2.1249
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,12288,1,0,16.6225
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,256,1,0,0.7776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,512,1,0,1.3786
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,1024,1,0,2.8869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,16384,1,0,22.5384
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,1536,1,0,4.1057
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,2048,1,0,5.2419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,3072,1,0,7.5937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,4096,1,0,10.5249
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,6144,1,0,15.4935
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,32768,1,0,39.4943
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,1,1,0,0.2392
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,8192,1,0,20.7853
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,16,1,0,0.3249
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,64,1,0,0.4871
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,32,1,0,2.1207
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,10240,1,0,26.1135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,128,1,0,2.1181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,256,1,0,1.3652
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,512,1,0,2.5615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,12288,1,0,32.5595
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,1024,1,0,5.5687
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,1536,1,0,7.5945
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,16384,1,0,35.6133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,2048,1,0,10.2234
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,3072,1,0,15.0452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,4096,1,0,20.0738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,1,1,0,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,16,1,0,0.3499
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,6144,1,0,31.3197
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,32,1,0,2.1084
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,64,1,0,0.7737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,8192,1,0,33.5511
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,128,1,0,1.3633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,256,1,0,2.8526
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,512,1,0,5.1314
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,32768,1,0,77.4676
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,10240,1,0,42.4098
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,1024,1,0,10.1237
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,1536,1,0,14.7143
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,12288,1,0,51.5476
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,2048,1,0,19.5512
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,1,1,0,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,16,1,0,2.1223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,32,1,0,2.1452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,3072,1,0,29.6713
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,64,1,0,1.3603
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,16384,1,0,70.2745
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,4096,1,0,32.5917
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,128,1,0,2.8479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,256,1,0,5.1412
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,512,1,0,11.0444
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,6144,1,0,49.4752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,1024,1,0,19.3643
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,1,1,0,0.2835
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,16,1,0,0.7745
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,32,1,0,1.3606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,1536,1,0,29.0374
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,64,1,0,2.5434
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,2048,1,0,31.5471
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,8192,1,0,66.8538
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,128,1,0,5.1716
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,256,1,0,10.0407
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,3072,1,0,48.0391
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,512,1,0,19.3125
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,1,1,0,0.3317
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,16,1,0,2.2053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,32,1,0,2.5425
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,1024,1,0,31.1764
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,64,1,0,5.1125
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,4096,1,0,64.8051
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,128,1,0,9.8645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,1536,1,0,46.9381
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,1,1,0,2.0492
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,256,1,0,19.3019
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,16,1,0,2.8409
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,32,1,0,5.1178
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,64,1,0,9.8456
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,2048,1,0,62.7065
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,512,1,0,31.0017
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,1,1,0,0.6522
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,128,1,0,21.4997
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,16,1,0,5.1184
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,32,1,0,9.8602
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,256,1,0,31.1272
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,3072,1,0,93.6230
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,64,1,0,19.8939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,1024,1,0,62.0901
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,1,1,0,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,16,1,0,0.2216
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,32,1,0,0.2532
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,64,1,0,0.2952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,128,1,0,0.3240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,256,1,0,2.1499
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,512,1,0,0.6593
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,1024,1,0,1.1985
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,1536,1,0,2.2318
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,2048,1,0,2.2322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,128,1,0,30.9905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,3072,1,0,3.6200
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,4096,1,0,4.6996
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,6144,1,0,6.6298
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,512,1,0,61.9007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,8192,1,0,9.1192
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,1,1,0,0.1818
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,10240,1,0,11.2074
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,16,1,0,0.2534
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,32,1,0,0.2963
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,64,1,0,0.3217
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,12288,1,0,13.7579
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,128,1,0,0.4355
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,256,1,0,0.6525
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,512,1,0,1.2332
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,1024,1,0,2.4940
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,16384,1,0,18.6654
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,1536,1,0,3.1910
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,256,1,0,61.7128
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,2048,1,0,4.5042
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,3072,1,0,6.3348
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,4096,1,0,8.4968
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,6144,1,0,13.0407
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,1,1,0,0.1920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,32768,1,0,40.2195
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,8192,1,0,17.5107
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,16,1,0,0.2979
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,32,1,0,0.3182
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,10240,1,0,21.8983
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,256,1,0,1.1707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,128,1,0,2.2915
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,512,1,0,2.1535
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,64,1,0,2.1276
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,12288,1,0,26.7185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,1024,1,0,4.1418
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,1536,1,0,6.4089
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,2048,1,0,8.5805
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,16384,1,0,36.2944
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,3072,1,0,12.5812
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,4096,1,0,16.8187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,1,1,0,0.1994
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,16,1,0,0.3168
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,6144,1,0,25.4620
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,32,1,0,0.4281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,64,1,0,2.1381
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,128,1,0,1.1629
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,8192,1,0,34.1059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,256,1,0,2.1433
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,32768,1,0,71.5669
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,512,1,0,4.4087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,1024,1,0,8.0718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,10240,1,0,43.3671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,1536,1,0,12.0779
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,2048,1,0,16.0976
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,12288,1,0,52.5245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,3072,1,0,24.7961
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,16384,1,0,64.3656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,1,1,0,0.2198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,16,1,0,0.4300
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,4096,1,0,33.2536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,32,1,0,2.1336
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,64,1,0,1.1626
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,128,1,0,2.1397
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,256,1,0,4.3550
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,512,1,0,8.2230
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,1024,1,0,15.9409
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,6144,1,0,50.4949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,8192,1,0,60.9874
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,1536,1,0,24.1871
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,1,1,0,0.2325
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,2048,1,0,32.1717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,16,1,0,2.1141
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,32,1,0,1.1608
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,64,1,0,2.1369
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,10240,1,0,77.2072
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,128,1,0,4.3485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,256,1,0,8.2234
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,3072,1,0,49.0071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,512,1,0,15.8761
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,1,1,0,0.2621
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,16,1,0,2.2071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,4096,1,0,58.9387
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,32,1,0,2.1348
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,1024,1,0,31.7002
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,64,1,0,4.3295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,128,1,0,7.9994
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,1536,1,0,47.9308
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,16384,1,0,128.4380
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,256,1,0,15.8672
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,6144,1,0,89.8601
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,1,1,0,0.3112
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,16,1,0,2.4413
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,2048,1,0,56.7864
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,32,1,0,4.0739
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,512,1,0,31.5638
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,64,1,0,8.0231
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,1,1,0,0.4325
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,16,1,0,4.0849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,128,1,0,16.0247
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,32,1,0,8.2092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,1,1,0,0.1618
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,16,1,0,0.2098
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,64,1,0,15.8245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,1024,1,0,56.1231
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,32,1,0,0.2329
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,64,1,0,0.2710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,256,1,0,36.1366
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,128,1,0,0.2895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,256,1,0,0.3829
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,512,1,0,2.1539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,1024,1,0,2.1632
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,1536,1,0,1.3673
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,2048,1,0,1.7719
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,128,1,0,31.4903
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,3072,1,0,2.6154
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,4096,1,0,3.7381
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,6144,1,0,5.5074
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,8192,1,0,7.2649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,512,1,0,55.8951
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,10240,1,0,8.8954
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,1,1,0,0.1780
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,16,1,0,0.2322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,12288,1,0,10.8043
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,32,1,0,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,64,1,0,0.2885
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,128,1,0,0.3789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,16384,1,0,14.7794
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,256,1,0,0.5461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,512,1,0,0.9309
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,1024,1,0,1.7178
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,1536,1,0,2.8223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,2048,1,0,3.5583
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,3072,1,0,4.9311
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,4096,1,0,6.6227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,256,1,0,64.8166
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,6144,1,0,10.0667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,32768,1,0,32.8819
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,1,1,0,0.1809
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,8192,1,0,13.8219
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,16,1,0,0.2731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,32,1,0,0.2821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,10240,1,0,17.4740
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,64,1,0,2.1330
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,128,1,0,0.5432
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,12288,1,0,21.1754
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,256,1,0,0.9211
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,512,1,0,1.6918
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,1024,1,0,3.4818
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,1536,1,0,5.0170
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,2048,1,0,6.3037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,16384,1,0,28.9912
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,3072,1,0,9.6240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,4096,1,0,13.1280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,1,1,0,0.1888
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,16,1,0,0.2816
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,6144,1,0,20.1791
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,32,1,0,0.3764
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,64,1,0,0.5411
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,128,1,0,0.9156
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,256,1,0,1.6799
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,8192,1,0,26.9166
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,512,1,0,3.1718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,1024,1,0,6.2035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,32768,1,0,64.1320
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,10240,1,0,34.1839
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,1536,1,0,9.5114
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,2048,1,0,12.6373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,12288,1,0,41.5245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,3072,1,0,19.3285
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,1,1,0,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,16384,1,0,56.9715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,16,1,0,0.3775
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,32,1,0,0.5404
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,64,1,0,2.1710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,4096,1,0,25.9436
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,128,1,0,1.9406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,256,1,0,3.1800
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,512,1,0,6.1648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,6144,1,0,39.3554
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,1024,1,0,12.4185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,1536,1,0,18.4608
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,8192,1,0,53.5262
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,1,1,0,0.2159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,2048,1,0,24.6758
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,16,1,0,0.5421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,32,1,0,2.1502
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,64,1,0,2.2658
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,128,1,0,3.1580
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,10240,1,0,67.7860
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,3072,1,0,38.0202
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,256,1,0,6.1533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,512,1,0,12.4262
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,1,1,0,0.2414
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,16,1,0,0.9170
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,4096,1,0,51.4676
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,32,1,0,2.3672
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,1024,1,0,24.5169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,64,1,0,3.1473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,128,1,0,6.1340
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,1,1,0,0.2866
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,1536,1,0,36.6677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,256,1,0,12.4337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,16,1,0,1.6762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,32,1,0,3.4293
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,64,1,0,6.1520
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,512,1,0,24.4305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,2048,1,0,49.2825
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,6144,1,0,78.5835
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,128,1,0,12.3424
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,1,1,0,0.4045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,16,1,0,3.4483
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,32,1,0,6.1363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,256,1,0,24.3498
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,64,1,0,12.3427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,1024,1,0,48.6908
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,1,1,0,0.1667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,16,1,0,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,32,1,0,0.2242
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,64,1,0,0.2642
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,128,1,0,24.3399
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,128,1,0,0.2627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,256,1,0,0.3549
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,512,1,0,0.5061
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,1024,1,0,0.8375
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,1536,1,0,2.1883
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,512,1,0,48.4340
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,2048,1,0,1.5319
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,3072,1,0,2.2786
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,4096,1,0,3.0026
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,6144,1,0,4.8139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,8192,1,0,6.3738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,256,1,0,48.3781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,10240,1,0,7.7185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,12288,1,0,9.3828
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,1,1,0,0.1724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,16,1,0,0.2269
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,32,1,0,0.2551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,16384,1,0,13.1334
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,64,1,0,0.2615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,128,1,0,2.1159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,256,1,0,0.5624
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,512,1,0,0.8146
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,1024,1,0,1.4781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,1536,1,0,2.1550
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,2048,1,0,2.8109
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,3072,1,0,4.5905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,1024,1,0,89.7236
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,4096,1,0,5.9943
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,32768,1,0,29.1771
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,6144,1,0,8.6608
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,1,1,0,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,16,1,0,0.2553
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,8192,1,0,11.9539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,10240,1,0,14.9446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,32,1,0,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,512,1,0,89.4343
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,64,1,0,0.3515
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,12288,1,0,18.4052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,128,1,0,0.4914
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,256,1,0,2.4051
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,512,1,0,1.4535
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,1024,1,0,2.7409
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,1536,1,0,4.0558
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,16384,1,0,25.2704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,2048,1,0,5.3997
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,3072,1,0,8.4894
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,4096,1,0,11.2707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,6144,1,0,17.2166
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,1,1,0,0.1819
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,16,1,0,0.2631
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,32,1,0,0.3479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,8192,1,0,23.2530
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,64,1,0,2.1160
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,128,1,0,2.1297
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,32768,1,0,56.7768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,256,1,0,1.4379
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,10240,1,0,29.5259
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,512,1,0,2.9958
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,1024,1,0,5.2847
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,12288,1,0,35.9850
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,1536,1,0,7.8999
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,2048,1,0,10.7277
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,3072,1,0,16.5288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,16384,1,0,49.6091
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,1,1,0,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,16,1,0,0.3518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,4096,1,0,22.0290
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,32,1,0,0.4931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,64,1,0,0.8021
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,128,1,0,2.2088
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,256,1,0,3.0108
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,6144,1,0,33.9733
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,512,1,0,5.2278
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,1024,1,0,10.5706
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,8192,1,0,46.0886
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,1536,1,0,15.9131
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,2048,1,0,20.9851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,1,1,0,0.2114
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,10240,1,0,58.5896
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,32,1,0,0.8036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,16,1,0,2.1069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,64,1,0,1.4371
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,128,1,0,3.0050
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,3072,1,0,32.4998
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,256,1,0,5.5337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,12288,1,0,71.6491
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,512,1,0,10.3562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,4096,1,0,44.0555
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,1,1,0,0.2366
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,16,1,0,0.8064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,1024,1,0,20.6405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,32,1,0,1.4337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,64,1,0,2.9780
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,128,1,0,5.2192
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,1536,1,0,31.3377
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,256,1,0,10.2874
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,6144,1,0,67.5744
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,1,1,0,0.2711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,2048,1,0,41.9987
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,512,1,0,20.5075
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,16,1,0,1.4361
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,32,1,0,2.6889
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,64,1,0,5.2239
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,128,1,0,10.4603
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,1,1,0,0.3812
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,16,1,0,2.6935
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,1024,1,0,41.2886
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,32,1,0,5.4781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,256,1,0,20.7903
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,64,1,0,10.4735
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,1,1,0,0.1666
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,16,1,0,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,32,1,0,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,64,1,0,0.2566
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,128,1,0,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,1536,1,0,62.2679
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,128,1,0,20.4278
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,256,1,0,0.3380
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,512,1,0,2.1103
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,1024,1,0,0.7828
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,512,1,0,41.0742
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,4096,1,0,87.6506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,1536,1,0,1.0965
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,2048,1,0,1.4132
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,3072,1,0,2.4912
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,4096,1,0,3.0749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,6144,1,0,4.1866
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,8192,1,0,5.6359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,1,1,0,0.1628
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,10240,1,0,7.1525
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,16,1,0,0.2230
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,12288,1,0,8.7142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,32,1,0,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,256,1,0,41.0034
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,64,1,0,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,16384,1,0,12.2286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,128,1,0,0.3403
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,256,1,0,0.4690
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,512,1,0,0.7602
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,1024,1,0,1.3613
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,1536,1,0,1.9759
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,2048,1,0,2.5782
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,3072,1,0,4.3799
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,4096,1,0,5.2330
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,6144,1,0,7.9832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,32768,1,0,27.2937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,1,1,0,0.1686
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,8192,1,0,11.0335
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,16,1,0,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,10240,1,0,13.8037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,32,1,0,0.2525
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,64,1,0,0.3347
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,12288,1,0,17.0114
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,128,1,0,0.4656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,256,1,0,2.1411
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,512,1,0,1.3332
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,1024,1,0,2.5078
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,1536,1,0,4.0336
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,16384,1,0,23.2465
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,2048,1,0,5.3356
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,3072,1,0,7.5120
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,4096,1,0,10.1726
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,6144,1,0,15.7743
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,1,1,0,0.1789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,16,1,0,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,8192,1,0,21.4115
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,32,1,0,0.3344
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,64,1,0,0.4656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,128,1,0,0.7498
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,10240,1,0,27.1802
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,256,1,0,1.3203
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,32768,1,0,53.1903
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,512,1,0,2.8045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,1024,1,0,4.8280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,12288,1,0,33.0579
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,1536,1,0,7.1952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,2048,1,0,9.6245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,3072,1,0,15.0869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,1,1,0,0.2003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,16384,1,0,45.8758
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,16,1,0,0.3349
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,4096,1,0,21.1135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,32,1,0,0.4637
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,64,1,0,0.7484
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,128,1,0,1.3162
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,256,1,0,2.9082
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,6144,1,0,31.2511
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,512,1,0,5.0372
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,1024,1,0,9.4609
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,8192,1,0,42.4274
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,1536,1,0,14.4603
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,2048,1,0,19.0847
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,1,1,0,0.2055
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,16,1,0,0.4662
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,32,1,0,0.9000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,64,1,0,1.6179
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,10240,1,0,53.9920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,128,1,0,2.4545
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,256,1,0,4.7781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,3072,1,0,29.7580
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,512,1,0,9.6103
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,1,1,0,0.2282
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,4096,1,0,41.9060
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,16,1,0,2.1408
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,1024,1,0,18.7788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,32,1,0,1.3151
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,64,1,0,2.7454
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,128,1,0,5.0149
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,1536,1,0,28.3937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,256,1,0,9.5723
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,16384,1,0,91.3976
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,1,1,0,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,16,1,0,1.3186
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,2048,1,0,38.2373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,32,1,0,2.4563
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,512,1,0,22.6446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,64,1,0,6.1540
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,128,1,0,9.5476
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,1,1,0,2.0566
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,3072,1,0,59.0714
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,16,1,0,3.0851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,256,1,0,18.6295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,32,1,0,5.0315
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,1024,1,0,37.6542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,1,1,0,0.1604
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,16,1,0,0.2035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,64,1,0,11.9143
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,32,1,0,0.2218
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,64,1,0,0.2588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,128,1,0,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,256,1,0,2.1215
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,512,1,0,37.3859
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,512,1,0,2.1283
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,128,1,0,18.5445
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,1024,1,0,0.7600
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,1536,1,0,1.0616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,2048,1,0,2.4060
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,3072,1,0,2.0121
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,4096,1,0,2.6525
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,6144,1,0,4.3064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,8192,1,0,5.4044
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,10240,1,0,6.8703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,12288,1,0,8.3806
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,1,1,0,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,16,1,0,0.2199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,16384,1,0,11.8273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,32,1,0,0.2521
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,64,1,0,0.2505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,2048,1,0,82.3710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,256,1,0,45.9063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,128,1,0,0.3307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,256,1,0,0.5286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,512,1,0,0.7394
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,1024,1,0,1.3179
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,1536,1,0,2.5474
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,2048,1,0,2.4600
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,3072,1,0,4.0461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,4096,1,0,5.0104
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,6144,1,0,7.6466
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,32768,1,0,26.4016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,8192,1,0,10.3890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,1,1,0,0.1852
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,16,1,0,0.2513
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,10240,1,0,13.4651
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,32,1,0,0.2600
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,64,1,0,0.3328
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,128,1,0,0.4563
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,12288,1,0,16.3240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,256,1,0,0.7292
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,512,1,0,2.2042
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,1024,1,0,2.3945
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,1536,1,0,3.5427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,2048,1,0,4.7031
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,16384,1,0,22.4932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,3072,1,0,7.1893
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,4096,1,0,9.7187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,6144,1,0,15.1129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,1,1,0,0.1908
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,16,1,0,0.2506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,8192,1,0,20.8300
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,32,1,0,0.3306
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,64,1,0,2.1226
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,128,1,0,0.7259
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,256,1,0,1.2733
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,32768,1,0,51.1787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,10240,1,0,25.9009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,512,1,0,2.8775
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,1024,1,0,4.5982
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,12288,1,0,31.8648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,1536,1,0,7.1317
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,2048,1,0,9.1797
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,3072,1,0,14.4653
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,16384,1,0,44.0145
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,4096,1,0,19.4161
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,1,1,0,0.1879
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,16,1,0,0.3305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,32,1,0,0.4517
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,128,1,0,1.2658
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,64,1,0,2.1393
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,6144,1,0,29.6599
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,256,1,0,2.8070
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,512,1,0,4.5431
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,1024,1,0,9.2036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,8192,1,0,40.5696
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,1536,1,0,13.6022
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,2048,1,0,18.3701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,1,1,0,0.1998
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,10240,1,0,51.7264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,16,1,0,0.4536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,32,1,0,0.7229
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,64,1,0,1.2663
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,3072,1,0,28.3211
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,128,1,0,2.6527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,12288,1,0,63.3540
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,256,1,0,4.5401
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,512,1,0,9.2373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,4096,1,0,38.5050
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,1,1,0,0.2268
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,16,1,0,0.8778
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,1024,1,0,18.0300
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,32,1,0,2.2529
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,16384,1,0,87.7087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,64,1,0,2.3398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,128,1,0,4.8129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,1536,1,0,27.2089
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,256,1,0,8.9161
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,1,1,0,0.2648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,2048,1,0,36.3531
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,16,1,0,1.2673
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,32,1,0,2.9629
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,512,1,0,17.9133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,64,1,0,6.1196
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,128,1,0,9.1093
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,1,1,0,2.0605
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,16,1,0,2.6407
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,8192,1,0,80.8936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,1024,1,0,35.8664
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,256,1,0,17.7340
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,32,1,0,4.8041
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,1,1,0,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,64,1,0,9.1079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,16,1,0,0.1975
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,32,1,0,0.2197
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,64,1,0,0.2509
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,128,1,0,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,1536,1,0,53.8870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,512,1,0,35.5476
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,256,1,0,0.3311
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,512,1,0,0.4597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,128,1,0,22.3063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,1024,1,0,2.1352
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,1536,1,0,1.0416
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,2048,1,0,1.3347
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,3072,1,0,1.9738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,4096,1,0,2.6786
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,6144,1,0,4.2473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,8192,1,0,5.5867
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,10240,1,0,7.0101
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,256,1,0,35.4959
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,12288,1,0,8.1826
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,1,1,0,0.1706
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,16,1,0,0.2198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,32,1,0,0.2544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,64,1,0,0.2507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,16384,1,0,11.5539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,128,1,0,0.3247
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,256,1,0,0.4529
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,1024,1,0,1.2830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,512,1,0,2.1484
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,1536,1,0,1.8510
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,2048,1,0,2.7730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,3072,1,0,3.9421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,4096,1,0,4.8955
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,32768,1,0,25.7105
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,6144,1,0,7.7030
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,8192,1,0,10.1345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,1,1,0,0.1701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,10240,1,0,12.9534
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,16,1,0,0.2528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,512,1,0,70.9243
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,32,1,0,0.2509
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,64,1,0,0.3227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,12288,1,0,16.0221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,128,1,0,0.4495
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,256,1,0,2.1445
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,16384,1,0,22.0303
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,512,1,0,1.2582
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,1024,1,0,2.7099
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,1536,1,0,3.4584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,2048,1,0,4.5786
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,3072,1,0,7.2835
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,4096,1,0,9.6814
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,6144,1,0,14.5752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,1,1,0,0.1808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,16,1,0,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,64,1,0,0.4484
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,8192,1,0,19.8119
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,32,1,0,2.1301
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,128,1,0,2.1395
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,32768,1,0,50.3040
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,10240,1,0,25.4576
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,256,1,0,1.2442
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,512,1,0,2.6217
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,1024,1,0,4.7757
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,12288,1,0,30.9754
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,1536,1,0,6.7010
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,2048,1,0,8.9376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,3072,1,0,14.0952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,16384,1,0,43.0878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,1,1,0,0.1883
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,4096,1,0,18.9594
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,16,1,0,0.3251
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,32,1,0,0.4471
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,64,1,0,0.7129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,128,1,0,1.2393
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,256,1,0,2.2860
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,6144,1,0,29.1109
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,512,1,0,4.4431
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,1024,1,0,8.7636
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,8192,1,0,39.6706
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,1536,1,0,13.4463
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,1,1,0,0.2036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,2048,1,0,17.7267
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,16,1,0,2.1272
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,32,1,0,0.7125
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,10240,1,0,50.5628
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,64,1,0,1.2396
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,128,1,0,2.5905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,256,1,0,4.7388
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,3072,1,0,27.6441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,512,1,0,8.7124
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,1,1,0,0.2244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,4096,1,0,37.6350
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,16,1,0,0.7125
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,32,1,0,1.2390
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,1024,1,0,17.4537
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,64,1,0,2.5830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,128,1,0,4.4064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,1536,1,0,26.4508
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,256,1,0,8.9068
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,1,1,0,0.2578
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,16384,1,0,85.9145
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,16,1,0,1.2423
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,32,1,0,2.7001
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,512,1,0,17.4956
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,2048,1,0,38.7691
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,64,1,0,4.4132
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,128,1,0,8.8700
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,1,1,0,0.3587
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,16,1,0,2.5902
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,32,1,0,4.6823
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,1024,1,0,34.9654
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,256,1,0,17.2752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,64,1,0,8.6669
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,1,1,0,0.1585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,16,1,0,0.1971
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,32,1,0,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,128,1,0,0.2506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,64,1,0,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,256,1,0,0.3254
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,512,1,0,0.4552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,128,1,0,17.3976
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,1536,1,0,1.0298
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,1024,1,0,2.1562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,2048,1,0,1.3199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,3072,1,0,1.9586
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,512,1,0,34.5532
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,4096,1,0,2.8833
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,6144,1,0,3.8857
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,8192,1,0,5.2419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,1,1,0,0.1586
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,10240,1,0,6.9499
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,12288,1,0,8.1244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,256,1,0,34.5296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,16,1,0,0.2155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,32,1,0,0.2465
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,16384,1,0,11.2173
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,64,1,0,0.2454
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,128,1,0,0.3535
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,256,1,0,0.4482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,512,1,0,0.7208
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,1024,1,0,1.2687
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,1536,1,0,2.2603
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,2048,1,0,2.5733
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,3072,1,0,3.5995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,4096,1,0,4.8456
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,32768,1,0,25.6651
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,6144,1,0,7.6388
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,8192,1,0,10.4257
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,1,1,0,0.1677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,10240,1,0,12.9730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,16,1,0,0.2510
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,32,1,0,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,64,1,0,0.3203
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,12288,1,0,15.8111
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,128,1,0,0.4458
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,512,1,0,1.2424
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,1024,1,0,2.3082
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,1536,1,0,3.4115
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,256,1,0,2.1547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,16384,1,0,21.6260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,2048,1,0,4.5325
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,3072,1,0,7.1641
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,4096,1,0,9.5860
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,1,1,0,0.1756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,6144,1,0,14.5572
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,16,1,0,0.2442
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,32,1,0,0.3171
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,8192,1,0,19.6123
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,64,1,0,2.3567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,128,1,0,0.7074
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,256,1,0,1.2312
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,32768,1,0,49.8398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,10240,1,0,25.2183
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,512,1,0,2.2786
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,1024,1,0,4.6985
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,12288,1,0,30.8146
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,1536,1,0,6.5991
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,2048,1,0,9.0479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,3072,1,0,13.7303
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,1,1,0,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,16384,1,0,42.6642
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,16,1,0,0.3200
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,4096,1,0,18.7626
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,32,1,0,0.4417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,64,1,0,0.7026
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,128,1,0,1.2271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,256,1,0,2.2637
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,6144,1,0,28.6325
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,512,1,0,4.3781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,1024,1,0,8.9036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,1536,1,0,13.2759
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,8192,1,0,39.1651
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,1,1,0,0.2023
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,2048,1,0,17.6809
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,16,1,0,0.4443
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,32,1,0,0.7046
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,64,1,0,2.3140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,128,1,0,2.5585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,10240,1,0,49.9577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,3072,1,0,27.1712
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,256,1,0,4.6624
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,512,1,0,8.5960
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,1,1,0,0.2260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,16,1,0,0.7064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,32,1,0,1.2263
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,4096,1,0,37.1849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,1024,1,0,17.4111
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,64,1,0,2.2577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,128,1,0,5.5670
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,1536,1,0,26.1735
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,256,1,0,8.5676
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,1,1,0,0.2605
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,2048,1,0,35.0176
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,16,1,0,1.5348
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,32,1,0,2.2546
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,512,1,0,20.7932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,64,1,0,4.6327
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,128,1,0,8.7715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,1,1,0,0.3564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,16,1,0,2.2611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,1024,1,0,34.3959
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,3072,1,0,57.5215
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,256,1,0,16.9927
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,32,1,0,4.6366
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,64,1,0,8.7727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,128,1,0,16.9763
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,512,1,0,34.2415
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,1536,1,0,58.2636
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,256,1,0,34.1216
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,16,1,0,0.3130
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,0,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,32,1,0,0.3059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1536,1,0,2.1021
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,64,1,0,0.3363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,256,1,0,0.5512
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,2048,1,0,2.9953
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,128,1,0,0.4098
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,512,1,0,0.8518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,3072,1,0,4.2552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1024,1,0,1.4800
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,4096,1,0,5.5486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,6144,1,0,8.3365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,8192,1,0,11.1200
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,0,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,16,1,0,0.3082
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,10240,1,0,13.9973
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,32,1,0,0.3359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,12288,1,0,16.7609
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,64,1,0,0.4150
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,128,1,0,0.5505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,256,1,0,0.8494
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,16384,1,0,23.0029
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1024,1,0,2.9760
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,512,1,0,1.7041
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1536,1,0,4.0460
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,2048,1,0,5.4347
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,3072,1,0,8.0165
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,4096,1,0,10.8692
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,32768,1,0,40.4692
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,6144,1,0,16.4049
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,0,0.2772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,16,1,0,0.3326
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,8192,1,0,22.1720
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,64,1,0,0.5503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,10240,1,0,27.3435
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,128,1,0,0.8494
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,32,1,0,0.4099
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,256,1,0,1.7081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,12288,1,0,33.1712
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,512,1,0,2.7840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,16384,1,0,37.4950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1024,1,0,5.5453
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1536,1,0,8.0409
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,2048,1,0,10.6275
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,3072,1,0,16.0680
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,4096,1,0,21.5480
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,0,0.2847
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,16,1,0,0.4240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,6144,1,0,32.2134
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,32768,1,0,80.3053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,8192,1,0,35.8503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,32,1,0,1.2639
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,64,1,0,0.8522
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,128,1,0,1.4718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,10240,1,0,45.2503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,256,1,0,2.7748
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,512,1,0,5.3842
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1024,1,0,11.2825
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,12288,1,0,55.8275
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1536,1,0,15.8216
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,2048,1,0,21.0528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,16384,1,0,74.5103
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,0,0.3038
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,3072,1,0,31.4053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,16,1,0,0.5530
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,4096,1,0,34.8708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,32,1,0,1.2097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,64,1,0,1.4733
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,128,1,0,2.7783
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,256,1,0,5.4003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,6144,1,0,53.9857
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,512,1,0,10.7082
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1024,1,0,21.0928
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,0,0.3037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1536,1,0,31.0633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,8192,1,0,71.4006
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,2048,1,0,34.0201
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,16,1,0,1.2081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,32,1,0,1.4711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,64,1,0,2.7761
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,10240,1,0,90.1152
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,128,1,0,5.9036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,256,1,0,10.6873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,3072,1,0,52.6845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,512,1,0,20.9092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,0,0.3315
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,16,1,0,1.7155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,32,1,0,2.7746
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,4096,1,0,69.4319
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1024,1,0,33.7676
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,64,1,0,5.3865
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,128,1,0,10.6797
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1536,1,0,51.6668
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,256,1,0,20.8875
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,0,0.4127
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,16,1,0,2.9836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,2048,1,0,67.6708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,512,1,0,33.6532
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,32,1,0,5.3808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,64,1,0,10.5198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,0,0.5506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,128,1,0,20.9900
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,3072,1,0,103.0433
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1024,1,0,67.1662
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,16,1,0,5.3751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,32,1,0,10.6892
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,256,1,0,37.8109
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,0,0.2074
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,64,1,0,21.0220
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,16,1,0,0.2651
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,32,1,0,0.2555
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1536,1,0,101.1064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,64,1,0,0.2773
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,128,1,0,0.3323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,256,1,0,0.4406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,512,1,0,66.9751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,128,1,0,33.5511
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,512,1,0,1.1977
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1024,1,0,1.1520
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1536,1,0,1.8621
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,2048,1,0,2.2138
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,3072,1,0,3.1286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,4096,1,0,4.4187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,6144,1,0,6.3772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,8192,1,0,8.4228
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,10240,1,0,10.7356
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,0,0.2253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,12288,1,0,12.7606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,16,1,0,0.2545
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,32,1,0,0.2776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,64,1,0,0.3327
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,256,1,0,66.9624
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,128,1,0,0.4683
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,16384,1,0,17.4903
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,256,1,0,1.4711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,512,1,0,1.1483
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1024,1,0,2.1249
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1536,1,0,3.2534
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,2048,1,0,4.0630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,3072,1,0,6.0496
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,32768,1,0,37.4709
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,4096,1,0,8.3575
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,6144,1,0,12.4635
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1024,1,0,137.2431
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,8192,1,0,16.8755
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,16,1,0,0.2750
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,0,0.2232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,10240,1,0,20.9660
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,32,1,0,0.3358
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,64,1,0,0.4403
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,128,1,0,1.1988
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,256,1,0,1.1440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,12288,1,0,25.5390
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,512,1,0,2.3344
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1024,1,0,4.0342
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1536,1,0,5.9200
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,16384,1,0,34.4921
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,2048,1,0,8.1181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,3072,1,0,12.1071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,4096,1,0,16.1878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,0,0.2357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,6144,1,0,24.6391
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,16,1,0,0.3345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,32,1,0,0.4416
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,64,1,0,1.1983
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,128,1,0,1.1460
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,256,1,0,2.3327
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,8192,1,0,32.7210
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,32768,1,0,67.0913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,512,1,0,4.1928
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1024,1,0,8.0553
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,10240,1,0,41.5278
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1536,1,0,11.6856
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,2048,1,0,15.9378
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,12288,1,0,50.8027
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,3072,1,0,23.7936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,0,0.2530
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,32,1,0,0.6611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,16,1,0,1.1617
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,64,1,0,1.1480
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,4096,1,0,31.8955
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,128,1,0,2.1129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,256,1,0,4.1919
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,512,1,0,7.8637
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1024,1,0,15.8303
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,6144,1,0,49.7861
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,0,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,16,1,0,0.6614
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1536,1,0,23.4441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,32,1,0,1.1518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,64,1,0,2.3323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,128,1,0,4.1753
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,2048,1,0,31.0117
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,256,1,0,7.8468
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,10240,1,0,73.5052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,512,1,0,17.4405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,0,0.2751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,3072,1,0,47.6969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,16,1,0,1.1504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,32,1,0,2.1143
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,64,1,0,4.1678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1024,1,0,30.7717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,128,1,0,7.8502
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,0,0.3267
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,256,1,0,15.6291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,16,1,0,2.3276
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1536,1,0,46.7602
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,32,1,0,4.0028
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,64,1,0,7.8546
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,512,1,0,30.6715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,128,1,0,15.5459
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,0,0.4406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,16,1,0,4.0046
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,32,1,0,7.8245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,256,1,0,30.6442
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,64,1,0,15.5321
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,3072,1,0,82.9849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,0,0.1881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1024,1,0,60.0163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,32,1,0,0.2343
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,16,1,0,0.2455
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,64,1,0,0.2424
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,128,1,0,0.2862
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,256,1,0,0.3748
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,512,1,0,0.5322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,128,1,0,30.6008
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1024,1,0,0.9036
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1536,1,0,1.3441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,512,1,0,53.7730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,2048,1,0,1.9253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,3072,1,0,2.4128
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,4096,1,0,3.2029
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,6144,1,0,4.7678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,8192,1,0,6.3959
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,0,0.2057
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,10240,1,0,8.0618
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,12288,1,0,9.9890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,16,1,0,0.2342
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,32,1,0,0.2444
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,64,1,0,0.2980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,16384,1,0,13.6255
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,128,1,0,0.3768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,256,1,0,0.5327
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,512,1,0,0.8975
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,256,1,0,53.6363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1024,1,0,1.8661
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1536,1,0,2.3476
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,2048,1,0,3.0908
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,3072,1,0,4.6007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,4096,1,0,6.3384
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,32768,1,0,29.8056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,6144,1,0,9.3489
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,0,0.2090
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,16,1,0,0.2394
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,8192,1,0,12.7959
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,10240,1,0,16.1305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,32,1,0,0.2904
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,64,1,0,0.3781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,128,1,0,0.5306
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,12288,1,0,19.6057
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,256,1,0,0.8965
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,512,1,0,1.8551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1024,1,0,3.0550
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1536,1,0,4.6253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,2048,1,0,6.0931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,16384,1,0,26.8243
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,3072,1,0,9.0347
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,4096,1,0,12.5052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,0,0.2099
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,6144,1,0,18.8518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,16,1,0,0.2864
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,32,1,0,0.4051
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,64,1,0,0.5325
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,128,1,0,1.3819
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,256,1,0,1.6291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,8192,1,0,25.1929
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,32768,1,0,58.9774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,512,1,0,3.0430
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,10240,1,0,31.9004
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1024,1,0,6.0326
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1536,1,0,9.5775
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,12288,1,0,38.8268
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,2048,1,0,11.6394
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,3072,1,0,18.0226
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,0,0.2304
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,16384,1,0,53.1490
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,4096,1,0,24.2159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,16,1,0,1.1561
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,32,1,0,0.5335
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,64,1,0,0.8967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,128,1,0,1.6259
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,256,1,0,3.2116
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,512,1,0,5.8440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,6144,1,0,37.0086
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1024,1,0,11.5227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1536,1,0,17.5158
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,8192,1,0,50.0539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,0,0.2311
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,2048,1,0,23.3291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,16,1,0,0.5360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,32,1,0,1.2081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,64,1,0,1.6221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,128,1,0,3.0328
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,10240,1,0,63.3135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,256,1,0,5.8293
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,3072,1,0,35.7051
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,512,1,0,11.6971
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,4096,1,0,48.1136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,0,0.2401
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,16,1,0,1.2169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1024,1,0,23.0738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,32,1,0,1.8645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,64,1,0,3.2148
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,128,1,0,5.8035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1536,1,0,34.7200
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,256,1,0,11.6504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,6144,1,0,73.3127
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,0,0.2845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,512,1,0,22.9654
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,16,1,0,1.6226
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,2048,1,0,49.5295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,32,1,0,3.0339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,64,1,0,5.9721
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,0,0.3769
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,128,1,0,11.5970
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,16,1,0,3.2116
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,3072,1,0,70.7269
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1024,1,0,45.9421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,32,1,0,5.7925
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,256,1,0,22.9662
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,64,1,0,11.6033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1536,1,0,68.8095
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,128,1,0,22.8805
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,512,1,0,45.6552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,256,1,0,45.6009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,16,1,0,0.3221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,1,1,0,0.2667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,32,1,0,0.3315
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,64,1,0,0.3881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,128,1,0,0.4260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,256,1,0,0.5681
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,512,1,0,0.8897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,1024,1,0,1.6358
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,1536,1,0,2.2149
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,2048,1,0,3.1508
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,3072,1,0,4.5455
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,4096,1,0,6.0360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,6144,1,0,8.6263
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,8192,1,0,12.0911
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,1,1,0,0.2789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,16,1,0,0.3348
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,10240,1,0,15.0635
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,32,1,0,0.3877
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,64,1,0,1.2125
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,128,1,0,0.5635
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,12288,1,0,17.7633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,256,1,0,0.8812
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,512,1,0,1.5362
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,16384,1,0,24.2623
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,1024,1,0,2.9060
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,1536,1,0,4.3627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,2048,1,0,5.8578
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,3072,1,0,8.3521
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,4096,1,0,11.4958
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,1,32768,1,0,42.8228
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,6144,1,0,17.5025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,16,1,0,0.3815
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,8192,1,0,23.1143
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,1,1,0,0.2874
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,32,1,0,0.4214
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,64,1,0,0.5629
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,10240,1,0,29.2877
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,128,1,0,1.4308
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,256,1,0,1.5202
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,12288,1,0,34.5229
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,512,1,0,2.8847
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,1024,1,0,5.7906
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,16384,1,0,39.3096
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,1536,1,0,8.1902
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,2048,1,0,11.2041
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,3072,1,0,16.6556
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,4096,1,0,22.4603
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,1,1,0,0.3069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,16,1,0,0.4177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,6144,1,0,34.3873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,32,1,0,1.2094
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,8192,1,0,37.4537
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,2,32768,1,0,83.8565
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,64,1,0,0.8716
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,128,1,0,1.8879
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,10240,1,0,47.1978
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,256,1,0,3.1259
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,512,1,0,5.7607
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,1024,1,0,11.0934
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,12288,1,0,58.2310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,1536,1,0,16.3666
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,2048,1,0,21.9307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,4,16384,1,0,77.9438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,3072,1,0,33.4950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,4096,1,0,36.5178
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,1,1,0,0.3319
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,16,1,0,1.2233
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,32,1,0,0.9527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,64,1,0,1.7578
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,128,1,0,3.0557
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,256,1,0,5.7528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,6144,1,0,56.3980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,512,1,0,11.2922
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,1024,1,0,21.8091
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,8192,1,0,74.4638
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,1536,1,0,32.1771
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,2048,1,0,35.5077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,32,1,1,0,0.3471
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,10240,1,0,94.1354
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,32,16,1,0,1.2773
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,32,32,1,0,1.5192
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,32,64,1,0,2.8544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,32,128,1,0,5.7420
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,3072,1,0,55.0440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,8,12288,1,0,113.9873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,32,256,1,0,11.0331
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,4096,1,0,72.7198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,32,512,1,0,21.7393
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,64,1,1,0,1.1611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,64,16,1,0,1.5259
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,64,32,1,0,2.8585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,32,1024,1,0,35.2014
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,64,64,1,0,5.7080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,64,128,1,0,12.4415
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,32,1536,1,0,53.9424
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,64,256,1,0,21.7107
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,128,1,1,0,1.1667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,16,6144,1,0,110.5469
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,128,16,1,0,2.8640
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,32,2048,1,0,70.6873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,64,512,1,0,35.0744
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,128,32,1,0,6.3347
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,128,64,1,0,11.0328
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,256,1,1,0,0.7250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,128,128,1,0,21.7297
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,256,16,1,0,5.7170
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,256,32,1,0,12.2777
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,32,3072,1,0,107.7229
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,128,256,1,0,35.0454
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,1,1,0,0.2175
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,256,64,1,0,22.2972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,16,1,0,0.2722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,32,1,0,0.2856
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,64,1,0,0.3328
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,128,1,0,0.3446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,256,1,0,1.2178
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,512,1,0,0.7031
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,1024,1,0,1.2578
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,1536,1,0,1.7837
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,2048,1,0,2.3523
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,3072,1,0,3.6365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,256,128,1,0,35.0075
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,4096,1,0,4.7637
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,6144,1,0,6.8454
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,128,512,1,0,69.8895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,8192,1,0,9.4218
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,1,1,0,0.2161
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,10240,1,0,11.6048
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,16,1,0,0.2818
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,12288,1,0,14.1492
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,32,1,0,0.3317
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,64,1,0,0.3446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,128,1,0,0.4624
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,16384,1,0,19.3702
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,256,1,0,0.6949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,512,1,0,1.2364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,1024,1,0,2.2991
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,1536,1,0,3.3332
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,2048,1,0,4.4333
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,3072,1,0,6.7346
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,4096,1,0,8.8924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,1,32768,1,0,40.8218
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,6144,1,0,13.5304
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,128,64,2048,1,0,144.2644
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,1,1,0,0.2263
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,8192,1,0,18.2657
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,16,1,0,0.3300
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,32,1,0,0.3654
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,10240,1,0,22.7270
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,64,1,0,1.2183
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,256,1,0,1.2288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,128,1,0,1.2292
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,12288,1,0,27.4990
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,512,1,0,2.2798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,1024,1,0,4.5087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,1536,1,0,6.4003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,2048,1,0,8.7473
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,16384,1,0,37.2815
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,3072,1,0,13.0978
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,4096,1,0,17.4842
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,1,1,0,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,6144,1,0,26.3417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,16,1,0,0.3449
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,32,1,0,0.4602
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,64,1,0,0.6895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,128,1,0,1.4788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,8192,1,0,35.4062
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,256,1,0,2.2737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,2,32768,1,0,72.5904
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,512,1,0,4.3273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,1024,1,0,8.6785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,10240,1,0,44.7065
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,1536,1,0,12.7721
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,2048,1,0,16.9515
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,12288,1,0,54.5999
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,1,1,0,0.2587
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,3072,1,0,25.9629
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,4,16384,1,0,66.2255
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,16,1,0,1.2242
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,32,1,0,0.6905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,64,1,0,1.2239
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,4096,1,0,34.3168
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,128,1,0,2.5193
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,256,1,0,4.4720
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,512,1,0,8.6094
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,6144,1,0,52.8920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,1024,1,0,18.5815
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,1536,1,0,25.3531
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,8192,1,0,63.1898
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,32,1,1,0,0.2576
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,2048,1,0,33.2961
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,32,16,1,0,0.6918
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,32,32,1,0,1.2245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,32,64,1,0,2.2609
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,32,128,1,0,4.3234
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,32,256,1,0,8.4582
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,3072,1,0,51.4751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,32,512,1,0,16.7390
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,4096,1,0,61.3047
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,64,1,1,0,0.2885
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,8,12288,1,0,97.0698
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,64,16,1,0,1.4787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,64,32,1,0,2.2621
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,32,1024,1,0,33.1774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,64,64,1,0,4.4702
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,64,128,1,0,8.4306
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,128,1,1,0,0.3365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,32,1536,1,0,50.3980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,64,256,1,0,16.8788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,128,16,1,0,2.2550
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,16,6144,1,0,93.5118
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,32,2048,1,0,59.3775
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,128,32,1,0,4.4648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,64,512,1,0,33.0734
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,128,64,1,0,8.4453
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,256,1,1,0,0.4577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,128,128,1,0,16.8724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,256,16,1,0,4.4674
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,256,32,1,0,8.4304
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,64,1024,1,0,58.7924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,1,1,0,0.1848
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,128,256,1,0,33.1933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,256,64,1,0,16.6890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,16,1,0,0.2459
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,32,1,0,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,64,1,0,0.2925
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,128,1,0,0.2980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,256,1,0,0.3946
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,512,1,0,0.5658
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,1024,1,0,1.2622
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,1536,1,0,1.4543
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,2048,1,0,1.7872
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,256,128,1,0,32.9428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,3072,1,0,2.6291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,4096,1,0,3.6618
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,128,512,1,0,58.6229
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,6144,1,0,5.3633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,8192,1,0,6.9355
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,1,1,0,0.2063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,10240,1,0,8.8991
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,12288,1,0,10.6604
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,32,1,0,0.2858
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,16,1,0,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,16384,1,0,14.6811
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,64,1,0,0.3001
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,256,1,0,0.5555
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,128,1,0,1.2048
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,512,1,0,0.9480
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,1024,1,0,1.7376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,256,256,1,0,66.7434
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,1536,1,0,2.5039
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,64,64,2048,1,0,119.9744
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,2048,1,0,3.2971
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,3072,1,0,5.0870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,1,32768,1,0,31.8307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,4096,1,0,6.7398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,6144,1,0,9.9931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,1,1,0,0.2009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,16,1,0,0.2932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,32,1,0,0.2950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,8192,1,0,13.6372
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,64,1,0,0.3863
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,128,1,0,0.6219
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,10240,1,0,17.1896
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,256,1,0,0.9389
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,512,1,0,1.7148
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,12288,1,0,21.0296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,1024,1,0,3.2332
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,1536,1,0,4.7419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,2048,1,0,6.2762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,16384,1,0,28.4931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,3072,1,0,9.7147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,4096,1,0,12.9853
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,1,1,0,0.2083
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,16,1,0,0.2993
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,6144,1,0,19.8911
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,32,1,0,0.3877
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,64,1,0,0.5502
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,8192,1,0,26.4375
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,128,1,0,1.2805
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,256,1,0,1.7048
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,512,1,0,3.1998
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,2,32768,1,0,62.3281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,10240,1,0,33.5226
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,1024,1,0,6.3370
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,1536,1,0,9.2489
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,12288,1,0,40.9792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,2048,1,0,12.4555
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,3072,1,0,19.2836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,4,16384,1,0,55.9444
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,1,1,0,0.2287
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,4096,1,0,25.7338
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,16,1,0,0.3893
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,32,1,0,0.5485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,64,1,0,0.9350
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,128,1,0,1.7014
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,256,1,0,3.3964
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,512,1,0,6.3267
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,6144,1,0,39.0453
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,1024,1,0,12.1503
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,8192,1,0,52.8517
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,1536,1,0,18.6921
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,32,1,1,0,0.2359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,2048,1,0,24.6377
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,32,16,1,0,0.5519
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,32,32,1,0,0.9354
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,32,64,1,0,1.9314
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,10240,1,0,66.8632
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,32,128,1,0,3.3866
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,32,256,1,0,6.2794
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,3072,1,0,37.9112
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,32,512,1,0,12.0918
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,8,12288,1,0,81.3723
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,64,1,1,0,0.2629
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,16,4096,1,0,50.8851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,64,16,1,0,1.2554
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,64,32,1,0,1.6995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,64,64,1,0,3.1930
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,32,1024,1,0,27.3706
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,64,128,1,0,6.2600
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,128,1,1,0,0.3045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,32,1536,1,0,36.7042
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,64,256,1,0,12.0859
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,128,16,1,0,2.2052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,128,32,1,0,3.1769
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,64,512,1,0,24.3129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,32,2048,1,0,48.8713
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,128,64,1,0,6.1258
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,128,128,1,0,12.2252
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,256,1,1,0,1.1688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,256,16,1,0,3.1866
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,256,32,1,0,6.2771
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,128,256,1,0,24.1962
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,64,1024,1,0,48.4497
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,256,64,1,0,12.2205
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,1,1,0,0.1718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,16,1,0,0.2323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,32,1,0,0.2377
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,64,1,0,0.2731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,128,1,0,0.2711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,256,1,0,1.2050
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,256,128,1,0,23.9659
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,512,1,0,0.4979
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,1024,1,0,0.8201
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,1536,1,0,1.3935
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,128,512,1,0,48.1918
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,2048,1,0,1.4965
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,64,1536,1,0,72.6869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,3072,1,0,2.2097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,4096,1,0,2.9241
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,6144,1,0,4.5198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,8192,1,0,6.0091
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,10240,1,0,7.5292
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,1,1,0,0.1862
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,16,1,0,0.2402
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,12288,1,0,8.9846
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,32,1,0,0.2727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,256,256,1,0,48.4133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,64,1,0,0.2749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,16384,1,0,12.3017
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,128,1,0,0.3512
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,256,1,0,1.4241
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,512,1,0,0.8023
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,1024,1,0,1.4474
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,1536,1,0,2.0878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,2048,1,0,2.7359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,3072,1,0,4.2707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,1,32768,1,0,27.6223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,4096,1,0,5.6456
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,32,128,1024,1,0,89.0627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,6144,1,0,8.4850
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,1,1,0,0.1910
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,8192,1,0,11.4148
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,16,1,0,0.2712
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,32,1,0,0.2698
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,10240,1,0,14.2588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,64,1,0,1.2046
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,12288,1,0,17.5601
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,256,1,0,0.7897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,128,1,0,0.4881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,512,1,0,1.4238
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,1024,1,0,2.8622
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,1536,1,0,3.9190
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,16384,1,0,24.2683
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,2048,1,0,5.3432
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,3072,1,0,7.9104
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,4096,1,0,10.7604
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,6144,1,0,16.3027
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,1,1,0,0.2058
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,16,1,0,0.2688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,32,1,0,0.3534
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,64,1,0,0.4878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,8192,1,0,22.5901
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,128,1,0,0.7878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,256,1,0,1.4102
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,512,1,0,2.6409
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,10240,1,0,28.0156
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,2,32768,1,0,53.5062
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,1024,1,0,5.2340
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,12288,1,0,34.3150
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,1536,1,0,7.6055
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,2048,1,0,10.2506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,3072,1,0,15.8321
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,4,16384,1,0,47.1491
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,1,1,0,0.2153
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,4096,1,0,21.0789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,16,1,0,0.3521
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,32,1,0,0.4832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,64,1,0,0.9205
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,128,1,0,1.4039
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,256,1,0,2.6355
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,6144,1,0,32.5253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,512,1,0,5.2020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,1024,1,0,10.1029
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,8192,1,0,44.8580
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,1536,1,0,15.2305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,2048,1,0,20.0715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,32,1,1,0,0.2249
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,10240,1,0,55.8998
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,32,16,1,0,1.5349
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,32,32,1,0,0.7873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,32,64,1,0,1.4026
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,3072,1,0,30.9643
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,32,256,1,0,5.1892
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,32,128,1,0,2.6244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,8,12288,1,0,68.3031
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,32,512,1,0,10.0343
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,4096,1,0,42.2384
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,64,1,1,0,0.2418
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,64,16,1,0,0.7875
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,64,32,1,0,1.4079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,32,1024,1,0,19.9836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,64,64,1,0,3.3698
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,64,128,1,0,5.1659
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,64,256,1,0,10.0152
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,32,1536,1,0,29.8340
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,16,6144,1,0,64.7047
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,128,1,1,0,0.2827
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,128,16,1,0,1.4117
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,64,512,1,0,19.8845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,32,2048,1,0,40.0481
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,128,32,1,0,2.6177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,128,64,1,0,5.1769
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,256,1,1,0,0.3832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,128,128,1,0,10.0069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,256,16,1,0,3.4056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,64,1024,1,0,39.6286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,256,32,1,0,5.1710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,128,256,1,0,19.5897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,256,64,1,0,10.0053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,1,1,0,0.1619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,16,1,0,0.2164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,64,1,0,0.2602
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,32,1,0,0.2233
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,256,128,1,0,19.7210
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,128,1,0,0.2744
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,32,4096,1,0,83.9105
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,256,1,0,0.3361
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,128,512,1,0,39.5078
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,512,1,0,0.4646
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,1024,1,0,0.7555
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,1536,1,0,1.2990
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,2048,1,0,1.3541
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,3072,1,0,1.9884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,4096,1,0,2.6408
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,6144,1,0,3.9584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,8192,1,0,5.4821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,10240,1,0,6.9260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,12288,1,0,8.3366
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,1,1,0,0.1708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,256,256,1,0,39.4289
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,16,1,0,0.2285
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,32,1,0,0.2635
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,16384,1,0,11.2033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,64,1,0,0.2570
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,128,1,0,0.3319
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,512,1,0,0.7334
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,256,1,0,1.2177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,1024,1,0,1.3016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,1536,1,0,2.0867
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,2048,1,0,2.4498
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,3072,1,0,3.6967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,16,128,1024,1,0,79.0003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,1,32768,1,0,25.2229
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,4096,1,0,5.0949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,6144,1,0,7.6854
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,1,1,0,0.1790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,16,1,0,0.2666
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,8192,1,0,10.2969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,10240,1,0,13.0536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,32,1,0,0.2597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,64,1,0,0.3305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,128,1,0,0.4573
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,256,1,0,0.7237
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,12288,1,0,15.7549
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,512,1,0,1.5860
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,1024,1,0,2.3898
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,1536,1,0,3.5085
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,2048,1,0,4.7928
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,16384,1,0,21.8723
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,3072,1,0,7.2374
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,4096,1,0,9.5219
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,6144,1,0,14.7981
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,16,1,0,0.2596
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,1,1,0,0.1868
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,8192,1,0,19.9844
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,32,1,0,0.3322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,64,1,0,1.1980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,128,1,0,0.7222
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,10240,1,0,25.2411
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,256,1,0,1.2659
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,2,32768,1,0,49.0690
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,512,1,0,2.3605
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,1024,1,0,4.5336
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,12288,1,0,30.9673
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,1536,1,0,6.9670
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,2048,1,0,9.1456
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,3072,1,0,14.8245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,1,1,0,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,4,16384,1,0,42.7886
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,16,1,0,1.2005
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,4096,1,0,19.0342
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,64,1,0,0.7196
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,32,1,0,0.4553
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,128,1,0,1.2637
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,256,1,0,2.5376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,512,1,0,4.6647
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,6144,1,0,29.0257
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,1024,1,0,8.9926
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,1536,1,0,13.4455
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,8192,1,0,39.6854
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,32,1,1,0,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,2048,1,0,18.0343
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,32,16,1,0,0.4559
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,32,32,1,0,0.7207
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,32,64,1,0,1.2634
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,32,128,1,0,2.3404
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,3072,1,0,27.8185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,32,256,1,0,4.4936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,32,512,1,0,8.9417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,8,12288,1,0,61.6642
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,4096,1,0,37.8286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,32,1024,1,0,17.5497
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,64,1,1,0,0.2371
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,64,16,1,0,0.7212
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,64,32,1,0,1.2580
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,64,64,1,0,2.5391
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,64,128,1,0,4.4594
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,32,1536,1,0,26.5390
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,64,256,1,0,8.9201
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,16,6144,1,0,58.0329
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,128,1,1,0,0.2647
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,32,2048,1,0,35.8259
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,128,16,1,0,1.2688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,64,512,1,0,17.5852
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,128,32,1,0,2.3378
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,128,64,1,0,4.6362
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,256,1,1,0,0.3615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,128,128,1,0,8.7563
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,256,16,1,0,2.3318
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,32,3072,1,0,55.2852
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,64,1024,1,0,35.2956
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,256,32,1,0,4.6351
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,128,256,1,0,17.5696
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,256,64,1,0,8.7453
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,1,1,0,0.1627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,16,1,0,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,32,1,0,0.2188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,128,1,0,0.2542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,64,1,0,0.2569
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,256,128,1,0,17.4898
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,256,1,0,0.3276
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,64,1536,1,0,53.0562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,512,1,0,1.2143
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,128,512,1,0,35.0341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,1024,1,0,0.7235
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,1536,1,0,1.2717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,2048,1,0,1.2871
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,3072,1,0,2.0978
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,4096,1,0,2.4900
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,6144,1,0,3.7464
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,8192,1,0,5.2049
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,1,1,0,0.1588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,10240,1,0,6.3544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,12288,1,0,7.9243
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,256,256,1,0,35.0123
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,16,1,0,0.2221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,32,1,0,0.2492
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,16384,1,0,10.6781
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,64,1,0,0.2499
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,128,1,0,0.3267
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,256,1,0,0.4387
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,512,1,0,0.7027
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,1024,1,0,1.2402
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,1536,1,0,1.7664
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,2048,1,0,2.5296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,3072,1,0,3.4739
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,1,32768,1,0,24.3081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,4096,1,0,4.6440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,6144,1,0,7.2591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,1,1,0,0.1762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,8192,1,0,9.6012
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,10240,1,0,12.3989
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,16,1,0,0.2584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,32,1,0,0.2486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,64,1,0,0.3238
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,8,256,512,1,0,69.5376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,12288,1,0,15.1232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,128,1,0,0.5099
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,256,1,0,0.6942
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,512,1,0,1.2153
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,1024,1,0,2.2377
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,16384,1,0,20.6040
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,1536,1,0,3.4700
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,2048,1,0,4.5077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,3072,1,0,6.6720
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,4096,1,0,9.1806
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,6144,1,0,13.8303
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,1,1,0,0.1901
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,16,1,0,0.2528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,8192,1,0,18.9079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,32,1,0,0.3206
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,64,1,0,1.2080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,2,32768,1,0,46.9752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,10240,1,0,23.8818
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,128,1,0,0.6883
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,256,1,0,1.2019
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,512,1,0,2.4244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,12288,1,0,29.2988
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,1024,1,0,4.2492
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,1536,1,0,6.5400
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,2048,1,0,8.4551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,16384,1,0,40.4416
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,3072,1,0,13.4013
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,1,1,0,0.2100
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,32,1,0,0.4335
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,16,1,0,1.2110
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,4096,1,0,17.9677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,64,1,0,0.6878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,128,1,0,1.4677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,256,1,0,2.1882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,512,1,0,4.3776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,6144,1,0,27.3903
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,1024,1,0,8.4610
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,1536,1,0,12.7961
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,8,8192,1,0,37.4246
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,32,1,1,0,0.2095
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,2048,1,0,16.9402
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,32,16,1,0,0.4376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,32,32,1,0,1.2373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,32,64,1,0,1.1940
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,32,128,1,0,2.1869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,32,256,1,0,4.2112
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,3072,1,0,26.0319
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,4,32768,1,0,92.0606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,32,512,1,0,8.4626
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,4096,1,0,35.5298
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,64,1,1,0,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,64,16,1,0,0.6895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,32,1024,1,0,16.5947
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,64,32,1,0,1.4429
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,64,64,1,0,2.3914
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,32,1536,1,0,25.0753
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,64,128,1,0,5.2912
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,16,6144,1,0,54.7483
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,64,256,1,0,8.3927
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,128,1,1,0,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,32,2048,1,0,33.5330
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,128,16,1,0,1.4823
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,64,512,1,0,16.5751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,128,32,1,0,2.1824
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,128,64,1,0,4.1853
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,256,1,1,0,0.3536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,128,128,1,0,8.1917
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,64,1024,1,0,32.9037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,256,16,1,0,2.1942
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,128,256,1,0,16.5188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,256,32,1,0,5.3423
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,256,64,1,0,8.2136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,32,4096,1,0,70.6671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,1,1,0,0.1421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,16,1,0,0.2012
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,64,1536,1,0,49.6785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,128,512,1,0,32.6631
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,256,128,1,0,16.4150
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,32,1,0,0.2140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,64,1,0,0.2487
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,128,1,0,0.2506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,256,1,0,0.3204
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,512,1,0,0.4410
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,1024,1,0,0.7060
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,1536,1,0,0.9701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,2048,1,0,1.5086
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,3072,1,0,1.8368
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,4096,1,0,2.4175
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,6144,1,0,3.8207
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,8192,1,0,4.8791
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,10240,1,0,6.1788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,256,256,1,0,32.7135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,1,1,0,0.1527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,12288,1,0,7.7190
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,16,1,0,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,16384,1,0,10.3992
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,32,1,0,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,64,1,0,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,128,1,0,0.3179
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,256,1,0,0.4969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,4,128,1024,1,0,65.7067
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,512,1,0,1.2648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,1024,1,0,1.2084
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,1536,1,0,1.7192
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,2048,1,0,2.2398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,3072,1,0,3.3667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,4096,1,0,4.5044
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,1,32768,1,0,23.7780
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,6144,1,0,7.0524
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,1,1,0,0.1630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,8192,1,0,9.5138
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,16,1,0,0.2561
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,10240,1,0,12.0542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,32,1,0,0.2575
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,64,1,0,0.3163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,12288,1,0,14.5394
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,256,1,0,0.6737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,128,1,0,1.4803
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,512,1,0,1.3933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,1024,1,0,2.1609
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,16384,1,0,20.2666
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,1536,1,0,3.3686
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,2048,1,0,4.3628
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,3072,1,0,6.4710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,4096,1,0,8.7018
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,6144,1,0,13.5953
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,1,1,0,0.1768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,8192,1,0,18.3730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,16,1,0,0.2469
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,32,1,0,0.3152
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,2,32768,1,0,45.7680
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,128,1,0,0.6701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,64,1,0,1.2139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,256,1,0,1.1718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,10240,1,0,23.4311
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,512,1,0,2.1353
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,1024,1,0,4.2889
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,12288,1,0,28.8830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,1536,1,0,6.1722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,2048,1,0,8.1738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,3072,1,0,12.8331
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,16384,1,0,39.3807
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,4096,1,0,17.4401
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,1,1,0,0.2028
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,16,1,0,0.3167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,32,1,0,0.4267
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,64,1,0,0.6692
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,128,1,0,1.1685
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,6144,1,0,26.7887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,256,1,0,2.1169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,512,1,0,4.2199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,8192,1,0,36.3115
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,1024,1,0,8.1907
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,1536,1,0,12.3885
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,2048,1,0,16.4425
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,10240,1,0,46.3462
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,1,1,0,0.2054
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,4,32768,1,0,89.9583
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,16,1,0,0.4270
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,8,12288,1,0,56.7153
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,32,1,0,1.2368
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,64,1,0,1.1660
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,3072,1,0,25.3544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,128,1,0,2.1251
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,256,1,0,4.0755
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,512,1,0,8.1942
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,4096,1,0,36.0224
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,1024,1,0,15.9188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,64,1,1,0,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,64,16,1,0,1.4253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,64,32,1,0,1.1680
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,1536,1,0,24.0952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,6144,1,0,53.1069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,64,64,1,0,2.3314
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,64,128,1,0,4.0405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,2048,1,0,32.4212
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,64,256,1,0,7.9571
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,128,1,1,0,0.2502
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,16,8192,1,0,72.3428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,128,16,1,0,1.1684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,128,32,1,0,2.1175
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,64,512,1,0,15.9986
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,128,64,1,0,4.0460
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,3072,1,0,50.4168
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,256,1,1,0,0.3489
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,128,128,1,0,7.9217
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,256,16,1,0,2.1257
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,64,1024,1,0,31.8495
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,256,32,1,0,4.0402
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,128,256,1,0,15.9598
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,256,64,1,0,7.9382
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,1,1,0,0.1460
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,16,1,0,0.2026
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,32,4096,1,0,68.5496
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,32,1,0,0.2133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,64,1,0,0.2405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,128,1,0,0.2403
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,256,128,1,0,15.6879
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,256,1,0,1.2000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,512,1,0,1.2123
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,128,512,1,0,31.6214
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,1024,1,0,0.6967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,1536,1,0,0.9562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,2048,1,0,1.6747
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,3072,1,0,2.0400
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,4096,1,0,2.3942
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,6144,1,0,3.5909
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,8192,1,0,4.9791
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,10240,1,0,6.2664
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,256,256,1,0,31.6035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,1,1,0,0.1511
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,16,1,0,0.2114
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,12288,1,0,7.6849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,32,1,0,0.2510
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,16384,1,0,10.4597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,64,1,0,0.2539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,128,1,0,0.3154
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,256,1,0,0.4257
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,512,1,0,1.2678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,1024,1,0,1.1916
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,1536,1,0,1.6937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,2048,1,0,2.1965
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,3072,1,0,3.4991
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,1,32768,1,0,23.3407
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,4096,1,0,4.4380
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,6144,1,0,6.9583
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,1,1,0,0.1633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,8192,1,0,9.3592
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,2,256,512,1,0,63.2496
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,10240,1,0,11.8722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,16,1,0,0.2413
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,32,1,0,0.2523
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,12288,1,0,14.3431
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,64,1,0,0.3161
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,128,1,0,0.4230
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,256,1,0,0.6645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,512,1,0,1.1727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,1024,1,0,2.3467
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,16384,1,0,19.9933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,1536,1,0,3.1420
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,2048,1,0,4.1494
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,3072,1,0,6.5451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,4096,1,0,8.7339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,6144,1,0,13.2115
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,1,1,0,0.1811
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,8192,1,0,18.0935
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,16,1,0,0.2448
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,32,1,0,0.3143
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,2,32768,1,0,45.2776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,64,1,0,0.4224
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,128,1,0,0.6629
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,10240,1,0,22.9033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,256,1,0,1.4105
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,512,1,0,2.1077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,12288,1,0,28.1666
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,1024,1,0,4.2104
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,1536,1,0,6.2256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,2048,1,0,8.2117
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,3072,1,0,12.6147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,16384,1,0,38.8987
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,4096,1,0,16.9834
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,1,1,0,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,16,1,0,0.3111
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,64,1,0,0.6629
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,32,1,0,1.4833
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,128,1,0,1.4196
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,256,1,0,2.0968
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,6144,1,0,26.4057
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,512,1,0,4.1557
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,1024,1,0,7.8882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,8192,1,0,35.8421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,1536,1,0,12.0251
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,2048,1,0,16.1724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,10240,1,0,45.6135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,32,1,1,0,0.2013
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,32,16,1,0,0.4239
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,4,32768,1,0,88.8185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,3072,1,0,25.0025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,32,32,1,0,1.2321
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,32,64,1,0,1.1504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,32,128,1,0,2.0817
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,32,256,1,0,5.1782
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,32,512,1,0,8.0542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,4096,1,0,35.5789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,32,1024,1,0,15.8288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,8,16384,1,0,77.4307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,64,1,1,0,0.2196
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,64,16,1,0,0.6658
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,32,1536,1,0,23.9059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,6144,1,0,52.3596
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,64,32,1,0,1.1543
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,64,64,1,0,2.0891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,64,128,1,0,4.1248
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,64,256,1,0,7.9852
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,32,2048,1,0,31.9626
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,128,1,1,0,0.2470
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,128,16,1,0,1.1531
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,64,512,1,0,15.5263
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,16,8192,1,0,71.3564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,128,32,1,0,2.8761
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,128,64,1,0,4.1368
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,128,128,1,0,7.9484
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,256,1,1,0,0.3437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,32,3072,1,0,49.6732
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,64,1024,1,0,31.1398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,256,16,1,0,2.0925
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,256,32,1,0,4.1345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,128,256,1,0,15.6188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,256,64,1,0,7.9593
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,64,1536,1,0,47.2783
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,256,128,1,0,15.6346
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,128,512,1,0,31.1387
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,float16,1,256,256,1,0,30.8318
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,0,0.2510
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,32,1,0,0.3015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,64,1,0,0.3287
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,16,1,0,0.3154
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,128,1,0,0.4123
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,256,1,0,2.0540
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1024,1,0,1.5025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,512,1,0,2.0737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,2048,1,0,3.1944
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1536,1,0,2.1506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,4096,1,0,6.1964
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,0,0.3049
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,16,1,0,0.3135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,32,1,0,0.3468
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,64,1,0,0.4419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,128,1,0,0.6112
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,256,1,0,0.9486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,3072,1,0,4.1924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,6144,1,0,8.1946
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,512,1,0,1.6746
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1024,1,0,2.8250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1536,1,0,4.0688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,8192,1,0,11.1839
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,2048,1,0,6.0722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,3072,1,0,8.1428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,16384,1,0,25.7440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,32768,1,0,40.2637
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,4096,1,0,10.7071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,10240,1,0,13.7981
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,6144,1,0,16.3335
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,12288,1,0,16.7900
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,0,0.2572
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,8192,1,0,24.4429
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,16,1,0,0.3347
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,32,1,0,0.4153
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,64,1,0,0.5937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,10240,1,0,27.7567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,128,1,0,0.9486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,256,1,0,2.1250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,512,1,0,3.1336
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,12288,1,0,37.6998
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1024,1,0,5.3238
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,16384,1,0,37.2693
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1536,1,0,8.9824
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,2048,1,0,10.7135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,3072,1,0,15.9934
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,4096,1,0,21.3273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,0,0.2748
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,16,1,0,0.4152
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,32,1,0,2.0262
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,6144,1,0,36.7528
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,8192,1,0,36.2681
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,64,1,0,0.8675
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,128,1,0,2.1700
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,10240,1,0,44.9179
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,256,1,0,3.1708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,512,1,0,5.3128
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1024,1,0,10.5572
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,12288,1,0,63.1259
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1536,1,0,15.7271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,2048,1,0,23.9840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,3072,1,0,31.3201
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,0,0.2957
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,4096,1,0,35.6969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,16,1,0,0.5699
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,32,1,0,0.9549
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,32768,1,0,79.7651
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,64,1,0,2.1254
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,128,1,0,3.1674
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,256,1,0,5.5509
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,6144,1,0,53.7078
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,512,1,0,10.3911
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1024,1,0,20.7207
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1536,1,0,30.8365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,16384,1,0,73.9218
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,0,0.3005
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,16,1,0,0.8642
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,2048,1,0,40.1838
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,32,1,0,2.1413
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,64,1,0,3.1261
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,128,1,0,5.2826
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,3072,1,0,51.1851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,256,1,0,12.1544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,512,1,0,20.6365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1024,1,0,33.6891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,0,0.3357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,16,1,0,2.1379
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,32,1,0,3.1280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1536,1,0,50.2533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,64,1,0,5.5511
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,10240,1,0,89.4266
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,128,1,0,10.5806
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,4096,1,0,68.8788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,256,1,0,20.6131
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,0,0.4151
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,512,1,0,33.6104
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,16,1,0,2.7762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,32,1,0,5.2864
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,64,1,0,10.5718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,6144,1,0,106.3727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1024,1,0,66.6409
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,128,1,0,20.3310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,0,1.9719
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,2048,1,0,85.8517
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,16,1,0,5.2808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,256,1,0,33.4670
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,32,1,0,10.3173
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,64,1,0,20.6328
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,3072,1,0,102.0155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1536,1,0,100.1538
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,128,1,0,33.3552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,256,1,0,66.7035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1024,1,0,133.1507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,512,1,0,0.6899
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,16,1,0,0.2769
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1536,1,0,1.6875
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,32,1,0,0.2708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,64,1,0,0.2915
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,128,1,0,0.3608
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,2048,1,0,2.1821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,256,1,0,0.4733
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,4096,1,0,4.5088
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,0,0.2869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1024,1,0,1.2611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,0,0.2531
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,16,1,0,0.2724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,32,1,0,0.2916
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,64,1,0,0.3713
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,3072,1,0,3.1963
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,128,1,0,0.4707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,6144,1,0,6.3077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,256,1,0,0.7492
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,8192,1,0,8.7044
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,10240,1,0,10.8774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,512,1,0,1.1875
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1024,1,0,2.1661
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1536,1,0,3.1354
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,12288,1,0,13.0967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,2048,1,0,4.3969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,3072,1,0,6.1422
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,4096,1,0,8.4581
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,16384,1,0,17.7290
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,6144,1,0,12.6447
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,0,0.2441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,8192,1,0,16.8709
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,16,1,0,0.2929
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,32,1,0,0.3552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,10240,1,0,21.2297
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,64,1,0,0.4711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,128,1,0,0.6875
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,12288,1,0,25.7538
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,256,1,0,1.1787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,512,1,0,2.1519
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1024,1,0,4.3861
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,16384,1,0,34.6918
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,32768,1,0,37.9455
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1536,1,0,6.0083
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,2048,1,0,8.2169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,3072,1,0,12.3277
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,4096,1,0,16.4262
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,0,0.2493
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,6144,1,0,24.5418
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,16,1,0,0.3593
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,32768,1,0,67.7475
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,32,1,0,2.0405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,64,1,0,0.7593
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,128,1,0,1.1814
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,8192,1,0,33.0692
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,256,1,0,2.5383
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,512,1,0,4.3540
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,10240,1,0,41.9708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1024,1,0,7.9321
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,12288,1,0,50.8247
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1536,1,0,12.0251
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,2048,1,0,15.7509
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,16384,1,0,61.8755
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,3072,1,0,24.1444
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,0,0.2768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,16,1,0,0.4720
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,32,1,0,0.6874
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,64,1,0,2.0976
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,4096,1,0,32.4180
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,128,1,0,2.1563
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,256,1,0,4.3552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,512,1,0,8.1300
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,6144,1,0,48.9533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1024,1,0,15.8464
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,8192,1,0,59.5885
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1536,1,0,23.6550
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,0,0.2755
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,2048,1,0,31.4005
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,16,1,0,2.0364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,10240,1,0,74.3203
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,32,1,0,1.1828
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,64,1,0,2.6093
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,128,1,0,4.0426
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,3072,1,0,47.7772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,256,1,0,8.1011
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,4096,1,0,56.8400
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,512,1,0,15.8461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,0,0.2895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,16384,1,0,123.1068
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1024,1,0,31.2066
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,16,1,0,2.1100
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,32,1,0,2.1544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,6144,1,0,86.5008
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,64,1,0,4.3413
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,128,1,0,7.8741
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1536,1,0,46.7704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,256,1,0,15.7957
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,0,0.3519
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,2048,1,0,55.0005
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,16,1,0,2.1549
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,32,1,0,4.3424
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,512,1,0,31.0918
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,8192,1,0,117.0489
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,64,1,0,8.0869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,128,1,0,15.7684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,0,0.4718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,16,1,0,4.3310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1024,1,0,54.6700
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,32,1,0,7.8684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,256,1,0,31.1644
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,64,1,0,15.7335
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1536,1,0,81.9183
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,512,1,0,54.3905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,128,1,0,31.0358
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,2048,1,0,109.5753
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,256,1,0,54.3582
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1024,1,0,120.9156
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,16,1,0,0.2711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,32,1,0,0.2520
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,64,1,0,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1024,1,0,0.9383
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,128,1,0,0.3215
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,4096,1,0,3.3510
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,256,1,0,0.4145
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,512,1,0,0.5675
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,0,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1536,1,0,1.3263
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,2048,1,0,1.7013
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,0,0.2359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,3072,1,0,2.4949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,16,1,0,0.2513
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,6144,1,0,5.1684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,32,1,0,0.2674
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,64,1,0,0.3155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,8192,1,0,6.5793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,128,1,0,2.0428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,256,1,0,0.5662
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,512,1,0,0.9378
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1024,1,0,2.1659
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1536,1,0,2.5972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,10240,1,0,8.2897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,2048,1,0,3.5148
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,3072,1,0,4.7348
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,12288,1,0,10.0559
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,4096,1,0,6.3435
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,6144,1,0,9.8417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,16384,1,0,14.0474
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,8192,1,0,13.2176
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,0,0.2322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,10240,1,0,16.4024
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,32,1,0,0.3166
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,16,1,0,0.2656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,12288,1,0,19.9001
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,64,1,0,2.0474
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,128,1,0,0.5700
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,256,1,0,0.9276
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,512,1,0,2.1906
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1024,1,0,3.4744
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,16384,1,0,27.4710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1536,1,0,4.6070
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,2048,1,0,6.1069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,3072,1,0,9.4827
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,32768,1,0,30.5307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,4096,1,0,12.7407
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,0,0.2362
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,6144,1,0,19.2449
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,16,1,0,0.3154
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,32768,1,0,60.2227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,8192,1,0,25.7079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,64,1,0,0.5703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,32,1,0,2.0392
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,10240,1,0,32.7615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,128,1,0,2.0905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,256,1,0,1.6745
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,512,1,0,3.5523
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,12288,1,0,39.7555
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1024,1,0,6.2637
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1536,1,0,9.2728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,2048,1,0,12.0317
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,3072,1,0,18.3218
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,16384,1,0,54.3689
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,4096,1,0,25.0029
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,0,0.2489
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,16,1,0,0.4102
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,32,1,0,2.0596
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,64,1,0,0.9362
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,128,1,0,1.6765
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,6144,1,0,37.9389
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,256,1,0,3.1136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,512,1,0,6.2452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1024,1,0,12.1494
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,8192,1,0,51.2905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1536,1,0,18.0852
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,10240,1,0,64.7953
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,2048,1,0,24.0579
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,0,0.2568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,16,1,0,0.5716
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,32,1,0,0.9372
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,12288,1,0,78.9667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,3072,1,0,36.5843
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,64,1,0,1.6779
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,128,1,0,3.1061
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,256,1,0,6.0060
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,512,1,0,12.1248
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,4096,1,0,49.4518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,16384,1,0,101.6475
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,0,0.2670
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1024,1,0,23.8256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,16,1,0,2.0940
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,32,1,0,1.6736
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,64,1,0,3.1133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,128,1,0,6.2194
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1536,1,0,35.6370
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,6144,1,0,75.3059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,256,1,0,12.0771
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,0,0.3141
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,2048,1,0,47.5027
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,16,1,0,1.6790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,32,1,0,3.4590
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,512,1,0,23.7799
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,64,1,0,6.2226
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,128,1,0,12.0682
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,0,2.0055
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,16,1,0,3.4379
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,32,1,0,6.2615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,256,1,0,23.6998
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1024,1,0,47.0924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,64,1,0,12.0662
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,128,1,0,23.6724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,512,1,0,47.0305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1536,1,0,77.2351
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,256,1,0,46.9945
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,16,1,0,0.3173
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,32,1,0,0.3305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,1,1,0,0.3275
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,64,1,0,0.3822
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,128,1,0,0.4417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,256,1,0,2.0560
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,512,1,0,0.9005
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,1024,1,0,1.5785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,1536,1,0,2.2727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,2048,1,0,3.3227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,3072,1,0,4.3487
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,4096,1,0,6.0156
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,6144,1,0,8.6139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,1,1,0,0.3004
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,8192,1,0,11.9125
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,16,1,0,0.3280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,10240,1,0,14.6618
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,12288,1,0,17.6442
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,32,1,0,0.3767
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,64,1,0,0.4280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,128,1,0,0.5873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,256,1,0,0.8917
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,16384,1,0,24.3001
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,512,1,0,2.6487
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,1024,1,0,2.9003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,1536,1,0,4.2426
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,2048,1,0,5.8438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,3072,1,0,8.3487
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,4096,1,0,11.3619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,1,32768,1,0,42.6504
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,6144,1,0,17.3273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,1,1,0,0.2926
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,16,1,0,0.3791
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,8192,1,0,22.6776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,64,1,0,0.5810
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,32,1,0,0.4255
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,128,1,0,0.8875
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,10240,1,0,29.1656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,256,1,0,2.1627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,12288,1,0,34.3117
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,512,1,0,2.8780
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,1024,1,0,5.7088
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,16384,1,0,39.2731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,1536,1,0,8.3966
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,2048,1,0,11.2297
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,3072,1,0,16.5467
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,4096,1,0,22.0421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,1,1,0,0.3142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,16,1,0,0.4258
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,6144,1,0,33.1746
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,32,1,0,2.0631
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,8192,1,0,37.3005
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,64,1,0,0.8851
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,2,32768,1,0,83.3962
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,128,1,0,1.5411
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,10240,1,0,47.1024
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,256,1,0,3.2292
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,512,1,0,5.6717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,1024,1,0,10.9590
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,12288,1,0,56.6827
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,1536,1,0,16.2381
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,2048,1,0,21.4855
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,4,16384,1,0,77.0328
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,1,1,0,0.3213
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,3072,1,0,32.6832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,4096,1,0,36.3444
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,16,1,0,0.5826
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,32,1,0,0.8896
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,64,1,0,1.5428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,128,1,0,3.2170
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,256,1,0,5.8070
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,6144,1,0,55.0081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,512,1,0,10.9284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,1024,1,0,21.9136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,8192,1,0,74.0334
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,1536,1,0,31.9567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,2048,1,0,35.4231
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,32,1,1,0,0.3379
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,32,16,1,0,0.8868
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,32,32,1,0,2.1855
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,8,10240,1,0,93.3267
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,32,64,1,0,2.8631
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,32,128,1,0,5.6589
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,3072,1,0,53.5428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,32,256,1,0,10.9353
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,32,512,1,0,21.3011
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,4096,1,0,72.2164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,64,1,1,0,0.3922
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,32,1024,1,0,35.0213
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,64,16,1,0,1.5442
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,64,32,1,0,3.2193
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,64,64,1,0,5.8105
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,64,128,1,0,10.9086
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,32,1536,1,0,52.5264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,6144,1,0,109.5823
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,64,256,1,0,21.8425
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,128,1,1,0,1.9940
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,128,16,1,0,2.8674
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,64,512,1,0,34.9924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,128,32,1,0,5.6474
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,128,64,1,0,10.9151
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,32,2048,1,0,88.8807
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,256,1,1,0,0.7468
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,16,8192,1,0,147.8934
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,128,128,1,0,21.8242
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,256,16,1,0,5.7221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,64,1024,1,0,69.5493
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,128,256,1,0,34.9862
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,256,32,1,0,10.8920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,1,1,0,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,256,64,1,0,21.8323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,32,1,0,0.2949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,16,1,0,0.2892
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,64,1,0,0.3486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,128,1,0,0.3720
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,256,1,0,2.0817
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,512,1,0,0.7303
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,1024,1,0,1.2961
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,256,128,1,0,34.8407
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,1536,1,0,1.8430
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,128,512,1,0,69.3666
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,2048,1,0,2.7725
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,3072,1,0,3.5181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,64,1536,1,0,104.5444
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,4096,1,0,4.9794
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,6144,1,0,6.9461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,1,1,0,0.2608
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,8192,1,0,9.5232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,10240,1,0,12.0011
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,16,1,0,0.3026
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,12288,1,0,14.3434
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,32,1,0,0.3475
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,64,1,0,0.3699
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,256,1,0,0.7247
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,128,1,0,2.0946
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,16384,1,0,19.1790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,128,256,256,1,0,69.5105
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,512,1,0,1.2756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,1024,1,0,2.3390
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,1536,1,0,3.7595
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,2048,1,0,4.4388
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,3072,1,0,6.9108
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,4096,1,0,8.9272
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,1,32768,1,0,41.2602
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,6144,1,0,13.6619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,1,1,0,0.2418
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,8192,1,0,18.3536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,16,1,0,0.3415
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,32,1,0,0.3681
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,64,1,0,0.4891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,10240,1,0,23.0213
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,128,1,0,0.7211
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,256,1,0,2.1615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,12288,1,0,27.7807
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,512,1,0,2.8888
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,1024,1,0,4.3813
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,1536,1,0,6.4986
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,2048,1,0,9.2161
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,2,16384,1,0,37.4645
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,3072,1,0,13.2621
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,4096,1,0,17.7461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,1,1,0,0.2468
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,6144,1,0,26.6532
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,16,1,0,0.3698
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,32,1,0,2.0687
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,64,1,0,0.7196
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,128,1,0,1.2591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,8192,1,0,35.9361
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,256,1,0,2.6908
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,512,1,0,4.3463
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,10240,1,0,45.1519
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,1024,1,0,8.7594
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,1536,1,0,12.9747
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,12288,1,0,54.6237
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,2048,1,0,17.9870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,16384,1,0,66.8728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,3072,1,0,26.1018
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,1,1,0,0.2773
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,16,1,0,2.0822
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,32,1,0,0.7181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,4096,1,0,34.9364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,64,1,0,1.2605
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,128,1,0,2.3053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,256,1,0,5.1385
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,6144,1,0,52.8070
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,512,1,0,8.4884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,1024,1,0,17.0112
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,8192,1,0,63.9146
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,1536,1,0,25.5064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,1,1,0,0.2795
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,2048,1,0,33.9828
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,16,1,0,2.0632
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,32,1,0,1.2630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,8,10240,1,0,80.7785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,64,1,0,2.2963
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,4,32768,1,0,144.7530
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,128,1,0,4.6407
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,256,1,0,8.4885
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,3072,1,0,51.4763
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,512,1,0,16.9858
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,64,1,1,0,0.3014
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,16,4096,1,0,62.0910
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,64,16,1,0,1.2642
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,64,32,1,0,2.2982
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,64,64,1,0,4.3345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,1024,1,0,36.6676
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,64,128,1,0,8.6876
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,1536,1,0,50.3322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,64,256,1,0,16.9562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,2048,1,0,60.0959
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,128,1,1,0,0.3577
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,128,16,1,0,2.2973
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,64,512,1,0,33.4961
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,128,32,1,0,4.3215
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,128,64,1,0,8.7034
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,128,128,1,0,16.9233
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,3072,1,0,91.8202
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,256,1,1,0,0.4885
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,64,1024,1,0,59.6238
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,256,16,1,0,4.6337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,128,256,1,0,33.2765
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,256,32,1,0,8.6949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,256,64,1,0,16.9464
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,32,4096,1,0,123.6164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,16,1,0,0.2712
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,64,1536,1,0,89.2741
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,128,512,1,0,59.2407
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,32,1,0,0.2787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,64,1,0,0.3139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,128,1,0,0.3284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,256,128,1,0,37.8829
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,256,1,0,0.4291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,512,1,0,0.5990
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,1024,1,0,1.0045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,1536,1,0,1.4335
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,2048,1,0,1.8426
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,3072,1,0,3.0499
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,4096,1,0,3.5584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,6144,1,0,5.3228
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,8192,1,0,7.3569
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,1,1,0,0.2877
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,10240,1,0,9.2305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,256,256,1,0,59.1380
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,1,1,0,0.2391
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,16,1,0,0.2820
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,12288,1,0,11.1335
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,16384,1,0,15.1465
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,32,1,0,0.3162
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,64,1,0,0.3271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,256,1,0,0.5921
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,128,1,0,2.0999
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,512,1,0,2.3469
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,1024,1,0,1.7936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,1536,1,0,2.5905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,2048,1,0,3.8286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,3072,1,0,5.0494
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,1,32768,1,0,32.5076
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,4096,1,0,6.9728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,6144,1,0,10.4480
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,8192,1,0,14.0844
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,1,1,0,0.2347
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,16,1,0,0.3152
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,10240,1,0,17.6939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,32,1,0,0.3299
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,64,1,0,2.0882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,128,1,0,0.5878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,12288,1,0,21.4480
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,256,1,0,0.9736
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,512,1,0,1.7717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,64,256,512,1,0,118.1586
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,1024,1,0,3.6610
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,1536,1,0,5.5045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,16384,1,0,28.9756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,2048,1,0,6.6749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,3072,1,0,10.0520
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,4096,1,0,13.4228
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,6144,1,0,20.3028
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,1,1,0,0.2357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,16,1,0,0.3305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,8192,1,0,27.0959
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,32,1,0,0.4199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,64,1,0,0.5854
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,2,32768,1,0,63.5349
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,128,1,0,2.1325
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,256,1,0,1.7569
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,10240,1,0,34.5924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,512,1,0,3.2776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,1024,1,0,6.3674
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,12288,1,0,41.9063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,1536,1,0,9.7486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,2048,1,0,13.7538
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,3072,1,0,19.6833
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,4,16384,1,0,57.1637
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,1,1,0,0.2571
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,16,1,0,0.4202
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,4096,1,0,27.2264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,32,1,0,0.5870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,64,1,0,0.9709
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,128,1,0,1.7501
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,256,1,0,3.6006
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,6144,1,0,40.1025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,512,1,0,6.5460
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,1024,1,0,12.7564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,8192,1,0,54.1370
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,1536,1,0,19.0772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,1,1,0,0.2584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,2048,1,0,27.0014
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,10240,1,0,68.4940
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,16,1,0,0.5891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,32,1,0,2.1243
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,64,1,0,1.7600
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,3072,1,0,38.7647
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,128,1,0,3.2646
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,12288,1,0,83.2972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,256,1,0,7.5597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,4096,1,0,52.2646
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,512,1,0,12.4601
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,1,1,0,0.2884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,16,1,0,2.1448
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,1024,1,0,25.1024
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,32,1,0,1.7496
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,64,1,0,3.5854
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,6144,1,0,79.6419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,128,1,0,6.3003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,1536,1,0,37.6398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,256,1,0,12.6664
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,2048,1,0,50.3318
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,1,1,0,0.3364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,16,1,0,1.7537
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,32,1,0,3.5929
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,8192,1,0,100.4479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,512,1,0,28.7821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,64,1,0,6.5153
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,1,1,0,0.4505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,128,1,0,12.6614
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,16,1,0,4.1565
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,1024,1,0,49.7213
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,32,1,0,6.5104
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,256,1,0,25.1023
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,64,1,0,12.6655
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,4096,1,0,96.7243
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,128,1,0,24.9352
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,1536,1,0,74.6114
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,512,1,0,49.2976
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,256,1,0,49.4346
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,1,1,0,0.2706
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,16,1,0,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,32,1,0,0.2606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,64,1,0,0.2929
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,128,1,0,0.3167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,256,1,0,0.3935
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,512,1,0,0.5417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,1024,1,0,0.8661
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,1536,1,0,3.6499
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,2048,1,0,1.5611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,3072,1,0,2.3037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,4096,1,0,3.0365
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,1,1,0,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,16,1,0,0.2611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,16384,1,0,12.9962
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,32,1,0,0.2923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,64,1,0,0.3018
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,6144,1,0,4.5162
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,8192,1,0,6.0685
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,128,1,0,0.3899
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,512,1,0,0.8471
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,256,1,0,2.1045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,10240,1,0,7.9068
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,1024,1,0,1.5135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,1536,1,0,2.1922
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,32768,1,0,28.5020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,2048,1,0,2.8542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,3072,1,0,4.2559
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,4096,1,0,5.9597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,6144,1,0,8.8564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,8192,1,0,11.9189
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,1,1,0,0.2250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,10240,1,0,14.7630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,12288,1,0,9.5139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,16,1,0,0.2992
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,32,1,0,0.3067
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,64,1,0,0.3884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,12288,1,0,17.9790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,128,1,0,0.5287
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,256,1,0,0.8352
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,512,1,0,2.4759
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,16384,1,0,24.9730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,1024,1,0,3.2459
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,1536,1,0,4.0681
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,2048,1,0,5.3915
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,3072,1,0,8.1986
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,4096,1,0,11.2788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,6144,1,0,17.5092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,1,1,0,0.2223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,16,1,0,0.3021
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,8192,1,0,23.1095
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,32768,1,0,55.0682
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,32,1,0,2.0821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,10240,1,0,29.1782
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,64,1,0,2.3284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,128,1,0,0.8319
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,256,1,0,2.1970
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,512,1,0,2.7469
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,12288,1,0,35.5325
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,1024,1,0,6.3792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,1536,1,0,8.9192
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,2048,1,0,10.7719
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,16384,1,0,48.7339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,3072,1,0,16.5156
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,4096,1,0,22.1706
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,1,1,0,0.2449
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,16,1,0,2.1040
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,32,1,0,2.0849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,64,1,0,0.8317
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,6144,1,0,33.7542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,128,1,0,2.1968
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,256,1,0,3.0721
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,512,1,0,5.2487
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,8192,1,0,45.6058
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,1024,1,0,10.5973
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,1536,1,0,15.9147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,10240,1,0,57.7810
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,2048,1,0,21.1735
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,32768,1,0,108.2882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,1,1,0,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,16,1,0,2.1052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,32,1,0,2.1141
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,64,1,0,1.4784
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,12288,1,0,70.5625
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,128,1,0,3.0891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,3072,1,0,32.4062
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,256,1,0,5.6511
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,512,1,0,10.3206
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,1,1,0,0.2695
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,4096,1,0,43.7976
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,16,1,0,2.1331
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,32,1,0,1.4778
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,1024,1,0,20.8824
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,64,1,0,2.7277
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,128,1,0,5.2396
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,1536,1,0,31.2405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,256,1,0,10.5090
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,6144,1,0,66.9793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,1,1,0,0.3081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,16,1,0,1.4740
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,512,1,0,20.7830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,2048,1,0,41.5746
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,32,1,0,2.7169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,64,1,0,5.2292
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,1,1,0,2.0688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,128,1,0,10.5290
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,16,1,0,2.7280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,1024,1,0,41.2849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,32,1,0,5.5674
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,256,1,0,20.6585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,64,1,0,10.5133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,128,1,0,20.6713
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,512,1,0,40.8085
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,256,1,0,40.9225
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,2048,1,0,82.9920
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,1,1,0,0.2385
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,16,1,0,0.2507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,32,1,0,0.2526
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,64,1,0,0.2924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,128,1,0,0.2907
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,256,1,0,0.3792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,512,1,0,0.5098
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,1024,1,0,0.8071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,1536,1,0,1.1914
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,2048,1,0,1.4216
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,3072,1,0,2.4617
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,4096,1,0,3.0901
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,1,1,0,0.2137
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,16,1,0,0.2609
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,6144,1,0,4.1205
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,32,1,0,0.2874
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,64,1,0,0.2948
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,8192,1,0,5.5379
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,128,1,0,0.3708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,32768,1,0,26.3339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,256,1,0,2.0963
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,512,1,0,2.1178
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,1024,1,0,1.3765
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,1536,1,0,1.9657
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,2048,1,0,2.5556
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,3072,1,0,4.1409
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,4096,1,0,5.1615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,10240,1,0,6.9876
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,6144,1,0,8.0656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,12288,1,0,8.4952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,8192,1,0,10.8648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,1,1,0,0.2077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,10240,1,0,13.6512
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,16,1,0,0.2845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,32,1,0,0.2878
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,64,1,0,0.3653
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,12288,1,0,16.3749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,128,1,0,0.4977
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,256,1,0,0.7734
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,512,1,0,1.3465
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,16384,1,0,23.0506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,16384,1,0,11.9252
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,1024,1,0,2.4979
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,1536,1,0,3.6758
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,2048,1,0,4.8691
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,3072,1,0,7.3934
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,4096,1,0,10.2071
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,6144,1,0,15.4813
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,1,1,0,0.2236
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,16,1,0,0.2874
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,8192,1,0,20.9410
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,32768,1,0,50.8622
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,32,1,0,0.3703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,64,1,0,0.4933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,128,1,0,2.1099
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,10240,1,0,26.5591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,256,1,0,1.3368
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,12288,1,0,32.0631
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,512,1,0,2.8386
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,1024,1,0,4.7640
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,1536,1,0,7.3150
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,2048,1,0,9.7119
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,16384,1,0,44.5280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,3072,1,0,14.8823
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,1,1,0,0.2451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,4096,1,0,20.0516
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,16,1,0,0.3688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,32,1,0,0.4964
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,64,1,0,0.7685
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,128,1,0,1.3341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,256,1,0,2.8096
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,6144,1,0,30.2765
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,512,1,0,4.7245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,1024,1,0,9.5212
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,8192,1,0,42.2615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,1536,1,0,14.0215
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,1,1,0,0.2360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,2048,1,0,19.0423
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,16,1,0,0.4949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,10240,1,0,52.5666
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,32,1,0,0.7717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,64,1,0,1.3281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,128,1,0,2.8285
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,3072,1,0,28.9258
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,256,1,0,4.7294
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,512,1,0,9.2537
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,4096,1,0,39.5892
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,1024,1,0,18.6546
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,1,1,0,0.2595
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,16,1,0,0.7715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,16384,1,0,88.2582
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,32,1,0,1.3288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,1536,1,0,28.1208
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,64,1,0,2.4516
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,128,1,0,4.9431
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,2048,1,0,37.3237
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,256,1,0,9.4647
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,1,1,0,0.2943
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,16,1,0,1.3288
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,512,1,0,18.5870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,8192,1,0,82.2477
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,32,1,0,2.4495
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,3072,1,0,57.9033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,64,1,0,4.9518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,128,1,0,9.4481
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,1,1,0,0.4020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,1024,1,0,36.9654
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,16,1,0,2.4591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,256,1,0,18.5458
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,4096,1,0,78.5014
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,32,1,0,4.9506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,64,1,0,9.4617
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,128,1,0,18.4685
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,512,1,0,36.8357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,2048,1,0,74.5388
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,256,1,0,36.7360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,1,1,0,0.1758
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,16,1,0,0.2404
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,512,1,0,72.8465
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,32,1,0,0.2498
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,128,1,0,0.2887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,64,1,0,0.2807
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,256,1,0,0.3717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,512,1,0,0.4954
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,1024,1,0,0.7741
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,1536,1,0,1.0692
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,2048,1,0,1.3583
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,3072,1,0,2.3509
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,12288,1,0,8.3229
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,1,1,0,0.2124
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,16384,1,0,11.1832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,4096,1,0,2.9562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,16,1,0,0.2499
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,6144,1,0,3.9084
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,32,1,0,0.2895
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,64,1,0,0.2836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,128,1,0,2.0834
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,256,1,0,0.4839
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,32768,1,0,25.3031
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,512,1,0,2.1130
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,1024,1,0,1.3156
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,1536,1,0,1.8637
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,8192,1,0,5.2662
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,2048,1,0,2.8045
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,3072,1,0,3.9697
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,10240,1,0,6.6572
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,4096,1,0,4.8991
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,6144,1,0,7.6643
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,8192,1,0,10.3772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,1,1,0,0.2133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,16,1,0,0.2791
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,10240,1,0,13.0474
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,32,1,0,0.2814
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,64,1,0,0.3619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,12288,1,0,15.5904
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,128,1,0,0.4802
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,256,1,0,2.1215
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,512,1,0,1.2835
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,1024,1,0,2.3583
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,16384,1,0,21.8220
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,1536,1,0,3.7644
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,2048,1,0,4.5893
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,3072,1,0,7.0019
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,4096,1,0,9.7083
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,6144,1,0,14.7162
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,1,1,0,0.2078
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,16,1,0,0.2842
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,8192,1,0,19.9029
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,32,1,0,0.3576
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,32768,1,0,48.7710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,64,1,0,2.0869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,128,1,0,0.7398
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,10240,1,0,25.2703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,256,1,0,1.2745
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,512,1,0,2.3445
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,12288,1,0,30.6923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,1024,1,0,4.7890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,1536,1,0,7.0056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,2048,1,0,9.1901
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,16384,1,0,42.3900
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,3072,1,0,14.0931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,4096,1,0,18.7392
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,1,1,0,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,16,1,0,2.0848
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,32,1,0,0.4806
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,64,1,0,0.7374
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,6144,1,0,28.9102
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,128,1,0,2.1805
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,256,1,0,2.3221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,512,1,0,4.4531
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,8192,1,0,39.3248
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,1024,1,0,9.0053
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,1536,1,0,13.2461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,10240,1,0,49.8890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,32768,1,0,95.5902
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,2048,1,0,17.9639
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,1,1,0,0.2415
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,16,1,0,0.4806
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,32,1,0,0.7429
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,64,1,0,2.1743
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,128,1,0,2.3078
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,12288,1,0,61.0827
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,3072,1,0,27.5958
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,256,1,0,4.7845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,512,1,0,8.7539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,4096,1,0,37.4915
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,1,1,0,0.2650
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,16,1,0,0.7388
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,1024,1,0,17.6728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,32,1,0,2.1619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,64,1,0,2.3150
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,128,1,0,4.4256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,1536,1,0,26.4373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,256,1,0,8.9598
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,2048,1,0,35.4376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,1,1,0,0.2873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,512,1,0,17.2986
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,16,1,0,2.1560
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,32,1,0,2.3042
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,64,1,0,4.7624
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,8192,1,0,78.1247
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,3072,1,0,54.7717
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,128,1,0,8.9245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,1,1,0,0.3887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,1024,1,0,34.9167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,16,1,0,3.2367
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,256,1,0,17.3301
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,32,1,0,4.4355
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,64,1,0,8.9494
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,128,1,0,17.4611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,512,1,0,34.5932
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,256,1,0,34.5311
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,512,1,0,68.8500
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,1,1,0,0.1711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,32,1,0,0.2547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,16,1,0,0.2375
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,64,1,0,0.2776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,128,1,0,0.2794
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,256,1,0,0.3635
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,512,1,0,0.4867
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,1536,1,0,1.0458
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,1024,1,0,2.1272
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,2048,1,0,2.1783
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,3072,1,0,1.9462
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,4096,1,0,2.5424
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,1,1,0,0.2037
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,12288,1,0,7.8943
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,16,1,0,0.2497
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,6144,1,0,3.8096
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,16384,1,0,11.1505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,32,1,0,0.2772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,64,1,0,0.2753
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,8192,1,0,5.4097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,128,1,0,0.3621
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,256,1,0,0.4780
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,1024,1,0,1.2796
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,512,1,0,2.2305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,1536,1,0,1.8215
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,32768,1,0,24.8526
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,2048,1,0,2.3622
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,3072,1,0,3.8399
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,10240,1,0,6.4859
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,4096,1,0,4.7620
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,6144,1,0,7.2295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,8192,1,0,10.0795
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,1,1,0,0.1962
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,16,1,0,0.2769
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,10240,1,0,12.7004
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,32,1,0,0.2773
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,64,1,0,0.3566
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,12288,1,0,15.2066
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,128,1,0,0.4714
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,256,1,0,2.1173
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,512,1,0,1.2548
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,16384,1,0,21.2984
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,1024,1,0,2.2901
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,1536,1,0,3.3641
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,2048,1,0,4.8649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,3072,1,0,6.7998
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,4096,1,0,9.4699
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,6144,1,0,14.3146
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,1,1,0,0.2198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,32768,1,0,47.6284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,8192,1,0,19.4193
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,16,1,0,0.2808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,32,1,0,0.3532
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,10240,1,0,24.6042
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,64,1,0,0.5469
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,128,1,0,2.2941
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,256,1,0,1.2390
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,12288,1,0,29.7334
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,512,1,0,3.0765
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,1024,1,0,4.3635
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,1536,1,0,6.7421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,16384,1,0,41.3409
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,2048,1,0,8.9334
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,3072,1,0,13.6707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,4096,1,0,18.2286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,1,1,0,0.2394
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,16,1,0,0.3536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,32,1,0,0.4726
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,6144,1,0,28.2259
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,64,1,0,0.7223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,128,1,0,1.2393
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,256,1,0,2.2529
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,512,1,0,4.5829
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,8192,1,0,38.2849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,1024,1,0,10.3021
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,1536,1,0,12.8549
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,10240,1,0,48.5787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,2048,1,0,17.4410
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,32768,1,0,93.5135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,1,1,0,0.2300
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,16,1,0,0.4731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,32,1,0,0.7238
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,64,1,0,1.2376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,128,1,0,2.6064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,12288,1,0,59.4963
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,3072,1,0,26.7721
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,256,1,0,4.6038
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,512,1,0,8.4837
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,4096,1,0,36.4472
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,1,1,0,0.2585
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,16,1,0,0.7308
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,1024,1,0,17.1556
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,32,1,0,1.2419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,64,1,0,2.2393
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,128,1,0,4.2949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,1536,1,0,25.7291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,256,1,0,8.7170
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,2048,1,0,34.3771
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,1,1,0,0.2855
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,512,1,0,17.0426
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,16,1,0,1.2416
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,32,1,0,2.8636
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,8192,1,0,76.0509
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,64,1,0,4.3059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,3072,1,0,53.1718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,128,1,0,8.6609
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,1,1,0,0.3872
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,1024,1,0,33.7649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,16,1,0,2.6208
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,256,1,0,21.1703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,32,1,0,4.2968
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,64,1,0,8.4333
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,1536,1,0,50.9475
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,512,1,0,33.5713
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,128,1,0,16.8979
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,256,1,0,33.4913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,512,1,0,66.6705
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,1,1,0,0.1685
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,16,1,0,0.2357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,32,1,0,0.2445
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,64,1,0,0.2882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,128,1,0,0.2743
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,256,1,0,0.3568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,512,1,0,0.4805
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,1024,1,0,2.1100
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,1536,1,0,1.0342
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,2048,1,0,1.3960
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,3072,1,0,2.2736
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,4096,1,0,2.5285
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,1,1,0,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,6144,1,0,4.0787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,12288,1,0,8.0408
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,16,1,0,0.2525
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,8192,1,0,5.0632
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,16384,1,0,10.7796
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,32,1,0,0.2720
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,64,1,0,0.2840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,128,1,0,0.3541
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,256,1,0,2.0742
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,512,1,0,0.7307
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,1024,1,0,1.2640
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,1536,1,0,1.8025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,2048,1,0,2.3399
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,3072,1,0,3.8084
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,32768,1,0,24.6420
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,4096,1,0,4.9803
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,10240,1,0,6.6477
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,6144,1,0,7.1269
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,8192,1,0,9.6972
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,1,1,0,0.2016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,16,1,0,0.2830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,10240,1,0,12.5275
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,32,1,0,0.2721
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,12288,1,0,15.0153
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,64,1,0,0.3569
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,128,1,0,2.1367
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,256,1,0,0.8566
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,512,1,0,1.2406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,1024,1,0,2.2596
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,16384,1,0,20.9991
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,1536,1,0,3.3254
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,2048,1,0,4.3941
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,3072,1,0,6.9278
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,4096,1,0,9.3175
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,6144,1,0,14.1089
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,1,1,0,0.2156
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,32768,1,0,47.2510
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,8192,1,0,18.8798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,16,1,0,0.2785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,32,1,0,0.3551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,64,1,0,0.4661
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,10240,1,0,24.2720
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,128,1,0,0.7227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,256,1,0,1.2241
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,512,1,0,2.5910
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,12288,1,0,29.5603
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,1024,1,0,4.5739
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,1536,1,0,6.4038
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,2048,1,0,8.5505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,16384,1,0,40.8543
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,3072,1,0,13.5033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,1,1,0,0.2248
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,4096,1,0,18.1505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,16,1,0,0.3513
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,32,1,0,0.4662
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,64,1,0,0.7169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,6144,1,0,27.7750
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,128,1,0,1.2183
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,256,1,0,2.2189
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,512,1,0,4.2505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,1024,1,0,8.3764
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,8192,1,0,37.8226
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,1536,1,0,12.8832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,10240,1,0,47.9518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,1,1,0,0.2257
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,2048,1,0,17.1167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,16,1,0,0.4680
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,32,1,0,2.1013
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,3072,1,0,26.4462
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,64,1,0,2.1539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,128,1,0,2.2113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,12288,1,0,58.7432
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,256,1,0,5.5598
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,4096,1,0,35.9481
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,512,1,0,10.4719
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,1,1,0,0.2464
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,1024,1,0,16.9003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,16,1,0,2.0976
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,32,1,0,1.2225
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,1536,1,0,25.3546
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,6144,1,0,55.1588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,64,1,0,2.5661
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,128,1,0,4.5092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,2048,1,0,33.8116
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,256,1,0,8.5684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,1,1,0,0.2897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,16,1,0,1.2250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,32,1,0,2.2174
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,512,1,0,16.4930
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,3072,1,0,52.3207
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,64,1,0,4.4991
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,128,1,0,8.5253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,1,1,0,0.3886
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,256,1,0,16.4558
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,1024,1,0,39.4506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,16,1,0,2.2234
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,4096,1,0,71.1322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,32,1,0,4.4925
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,0,0.2316
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,64,1,0,8.3041
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,512,1,0,33.0763
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,16,1,0,0.2854
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,32,1,0,0.2738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,128,1,0,16.6414
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,64,1,0,0.2953
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,128,1,0,0.3644
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,2048,1,0,67.1340
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,256,1,0,0.5151
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,512,1,0,0.7659
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1024,1,0,1.3328
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1536,1,0,1.8754
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,2048,1,0,2.4625
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,256,1,0,33.0625
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,3072,1,0,3.9693
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,4096,1,0,4.8885
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,6144,1,0,7.5570
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,1024,1,0,65.9024
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,8192,1,0,9.8294
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,10240,1,0,12.4732
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,0,0.2342
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,16,1,0,0.2731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,32,1,0,0.2936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,12288,1,0,15.0437
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,64,1,0,0.3611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,128,1,0,0.4905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,256,1,0,0.8286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,512,1,0,1.3209
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,16384,1,0,20.3962
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,512,1,0,65.7941
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1024,1,0,2.9274
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1536,1,0,3.6101
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,2048,1,0,4.7830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,3072,1,0,7.3440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,4096,1,0,9.7439
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,32768,1,0,36.0573
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,6144,1,0,14.6180
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,0,0.2375
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,16,1,0,0.2939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,8192,1,0,19.3788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,32,1,0,0.3623
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,64,1,0,2.0726
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,128,1,0,0.7638
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,10240,1,0,24.5656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,256,1,0,1.3270
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,512,1,0,2.4317
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,12288,1,0,29.6382
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1024,1,0,5.0097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1536,1,0,6.9855
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,16384,1,0,33.0421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,2048,1,0,9.5309
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,3072,1,0,14.0691
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,4096,1,0,19.0486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,0,0.2485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,16,1,0,0.3614
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,32,1,0,2.0987
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,6144,1,0,28.5627
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,8192,1,0,31.4235
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,64,1,0,2.1142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,128,1,0,2.1785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,32768,1,0,71.3507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,256,1,0,2.6562
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,10240,1,0,39.6928
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,512,1,0,4.9772
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1024,1,0,9.4364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,12288,1,0,48.0950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1536,1,0,14.0256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,2048,1,0,18.6101
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,3072,1,0,27.9050
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,4096,1,0,30.4377
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,0,0.2730
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,16,1,0,0.5192
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,32,1,0,0.7670
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,64,1,0,1.3231
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,128,1,0,2.4323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,6144,1,0,46.1836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,256,1,0,5.0034
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,512,1,0,10.0939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1024,1,0,18.4722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,8192,1,0,62.3913
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1536,1,0,27.5710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,2048,1,0,29.5185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,10240,1,0,78.9751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,0,0.2770
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,16,1,0,0.7702
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,32,1,0,2.2181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,32768,1,0,142.5311
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,3072,1,0,44.9633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,64,1,0,2.4286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,12288,1,0,95.9699
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,128,1,0,4.7121
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,256,1,0,9.3926
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,0,0.2915
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,512,1,0,18.2551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,4096,1,0,60.5392
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,16,1,0,2.2221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,32,1,0,2.4219
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,64,1,0,4.7236
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1024,1,0,29.2940
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,128,1,0,9.1810
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1536,1,0,47.1795
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,0,0.3590
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,256,1,0,18.4122
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,6144,1,0,92.2480
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,16,1,0,2.4310
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,32,1,0,4.7164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,2048,1,0,58.7788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,512,1,0,29.2055
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,64,1,0,9.1937
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,0,2.2347
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,128,1,0,18.3625
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,16,1,0,4.7163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,32,1,0,10.3063
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,256,1,0,29.1911
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,0,0.1970
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1024,1,0,58.2881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,16,1,0,0.2554
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,32,1,0,0.2455
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,64,1,0,18.3836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,64,1,0,0.2691
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,128,1,0,0.3136
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,256,1,0,2.1081
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,512,1,0,0.6151
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1024,1,0,1.0687
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1536,1,0,1.5058
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,2048,1,0,1.9564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,3072,1,0,3.2183
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,128,1,0,29.1281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,4096,1,0,4.0924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,6144,1,0,5.7212
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,8192,1,0,7.9138
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,512,1,0,58.0606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,0,0.2157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,10240,1,0,9.7390
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,16,1,0,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,12288,1,0,11.9131
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,32,1,0,0.2628
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,64,1,0,0.3147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,128,1,0,0.4153
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,256,1,0,0.6139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,16384,1,0,16.1870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,512,1,0,1.0656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1024,1,0,2.5399
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1536,1,0,2.8133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,2048,1,0,4.0009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,3072,1,0,5.5500
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,4096,1,0,7.6035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,32768,1,0,34.9181
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,6144,1,0,11.4439
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,0,0.2121
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,8192,1,0,15.3830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,16,1,0,0.2652
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,32,1,0,0.3158
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,64,1,0,0.4392
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,10240,1,0,19.3245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,128,1,0,0.6130
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,256,1,0,2.1888
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,512,1,0,1.9323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,12288,1,0,23.2325
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1024,1,0,3.6621
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1536,1,0,5.4068
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,16384,1,0,31.9009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,2048,1,0,7.3968
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,3072,1,0,11.1434
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,4096,1,0,14.8598
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,0,0.2225
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,6144,1,0,22.2886
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,16,1,0,0.3193
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,32,1,0,0.4133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,64,1,0,2.1009
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,8192,1,0,30.4827
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,128,1,0,1.0667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,32768,1,0,61.8069
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,256,1,0,1.9320
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,512,1,0,3.9586
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,10240,1,0,38.2217
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1024,1,0,7.1342
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1536,1,0,10.6868
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,2048,1,0,14.4318
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,12288,1,0,46.2808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,3072,1,0,21.7919
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,16384,1,0,55.9438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,0,0.2434
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,16,1,0,0.4154
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,32,1,0,0.6142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,4096,1,0,29.3507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,64,1,0,1.0644
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,128,1,0,1.9235
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,256,1,0,3.9382
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,512,1,0,7.3530
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,6144,1,0,44.4601
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1024,1,0,14.3604
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,8192,1,0,52.8406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1536,1,0,21.3377
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,0,0.2498
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,2048,1,0,28.3962
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,16,1,0,2.3900
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,32,1,0,1.0657
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,64,1,0,1.9291
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,128,1,0,3.9349
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,10240,1,0,67.7226
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,256,1,0,7.3249
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,3072,1,0,43.1481
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,512,1,0,14.2502
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,0,0.2633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,4096,1,0,50.8948
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,16,1,0,2.1954
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,32,1,0,1.9253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1024,1,0,28.0167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,64,1,0,3.6253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,128,1,0,7.2885
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1536,1,0,42.2688
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,256,1,0,14.2367
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,6144,1,0,77.7696
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,0,0.3162
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,2048,1,0,49.1699
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,16,1,0,1.9250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,32,1,0,3.9262
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,512,1,0,27.9429
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,64,1,0,7.0733
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,0,2.0774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,128,1,0,14.2239
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,16,1,0,3.6265
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,3072,1,0,75.0703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1024,1,0,48.7277
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,32,1,0,7.2889
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,256,1,0,31.9821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,0,0.1718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,64,1,0,14.2072
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,16,1,0,0.2216
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,32,1,0,0.2180
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,64,1,0,0.2374
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,128,1,0,0.2823
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,256,1,0,0.3881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,512,1,0,0.5109
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1024,1,0,0.8484
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,512,1,0,48.4333
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,128,1,0,27.9806
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1536,1,0,2.1889
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,2048,1,0,2.2982
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,3072,1,0,2.2760
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,4096,1,0,3.0582
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,6144,1,0,4.7556
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,8192,1,0,6.2685
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,10240,1,0,7.6188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,12288,1,0,9.5059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,0,0.1891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,256,1,0,48.3405
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,16384,1,0,12.7638
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,16,1,0,0.2271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,32,1,0,0.2340
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,64,1,0,0.2826
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,128,1,0,0.3632
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,256,1,0,2.1550
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,512,1,0,0.9424
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1024,1,0,1.5338
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1536,1,0,2.5422
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,2048,1,0,3.2066
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,32768,1,0,28.5003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,3072,1,0,4.3150
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,4096,1,0,6.0631
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1024,1,0,96.9990
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,6144,1,0,8.8134
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,0,0.1995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,8192,1,0,12.1336
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,16,1,0,0.2384
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,32,1,0,0.2867
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,10240,1,0,15.3038
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,64,1,0,2.1430
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,12288,1,0,18.5736
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,128,1,0,0.5113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,256,1,0,0.8399
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,512,1,0,1.5283
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1024,1,0,3.1532
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1536,1,0,4.4836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,16384,1,0,25.4760
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,2048,1,0,5.9174
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,3072,1,0,8.6876
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,4096,1,0,11.4613
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,6144,1,0,17.6177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,0,0.2075
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,16,1,0,0.2830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,32,1,0,0.3679
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,64,1,0,0.5096
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,128,1,0,0.8432
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,8192,1,0,23.9188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,256,1,0,1.5256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,32768,1,0,56.1156
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,512,1,0,2.8308
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,10240,1,0,30.1714
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1024,1,0,5.8928
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,12288,1,0,36.7123
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1536,1,0,8.2461
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,2048,1,0,11.2029
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,3072,1,0,16.9896
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,16384,1,0,50.2875
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,0,0.2238
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,4096,1,0,22.8752
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,16,1,0,0.3668
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,32,1,0,0.5120
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,64,1,0,2.1887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,128,1,0,1.5177
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,256,1,0,2.8339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,6144,1,0,34.7904
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,512,1,0,5.4657
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1024,1,0,11.0939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,8192,1,0,47.1536
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1536,1,0,16.4988
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,0,0.2138
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,2048,1,0,21.9512
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,16,1,0,0.5127
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,10240,1,0,59.8046
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,32,1,0,0.8418
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,64,1,0,1.5190
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,128,1,0,2.8116
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,3072,1,0,33.4988
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,256,1,0,5.7224
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,12288,1,0,73.6768
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,512,1,0,10.8438
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,4096,1,0,45.2485
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,0,0.2367
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1024,1,0,21.7087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,16,1,0,0.8489
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,32,1,0,1.5191
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,64,1,0,2.8228
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,128,1,0,5.4465
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1536,1,0,32.5391
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,256,1,0,11.0018
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,6144,1,0,69.2382
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,2048,1,0,43.4443
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,0,0.2780
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,16,1,0,2.2619
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,32,1,0,2.8138
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,512,1,0,21.4908
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,64,1,0,5.7096
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,128,1,0,10.9630
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,3072,1,0,66.4866
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,0,2.0774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,16,1,0,2.8152
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1024,1,0,42.9962
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,256,1,0,21.7446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,32,1,0,5.4332
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,64,1,0,10.9727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,4096,1,0,82.8138
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,128,1,0,21.5432
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,512,1,0,42.9080
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,256,1,0,42.8064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,1,1,0,0.2422
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,32,1,0,0.3014
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,64,1,0,0.3427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,128,1,0,0.3720
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,256,1,0,0.5062
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,16,1,0,0.2935
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,512,1,0,2.2003
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,1024,1,0,1.3923
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,1536,1,0,2.3746
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,2048,1,0,2.9557
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,3072,1,0,4.1919
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,4096,1,0,5.3875
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,6144,1,0,7.9479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,1,1,0,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,8192,1,0,10.7623
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,16,1,0,0.2999
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,10240,1,0,13.2093
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,32,1,0,0.3421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,64,1,0,0.3737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,12288,1,0,15.9376
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,128,1,0,0.5034
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,256,1,0,0.7941
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,512,1,0,2.2753
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,16384,1,0,21.5324
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,1024,1,0,2.5481
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,1536,1,0,4.2044
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,2048,1,0,5.2020
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,3072,1,0,7.4793
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,4096,1,0,10.2015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,32768,1,0,38.4555
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,6144,1,0,15.2597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,1,1,0,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,8192,1,0,20.4200
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,16,1,0,0.3478
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,32,1,0,0.3699
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,64,1,0,2.1667
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,10240,1,0,25.6663
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,128,1,0,2.3833
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,256,1,0,1.3670
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,512,1,0,2.8870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,12288,1,0,31.0584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,1024,1,0,5.5255
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,16384,1,0,34.9113
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,1536,1,0,7.5017
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,2048,1,0,9.9033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,3072,1,0,14.8491
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,4096,1,0,19.8060
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,1,1,0,0.2695
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,16,1,0,0.3724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,6144,1,0,29.9459
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,32,1,0,0.4995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,64,1,0,2.1930
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,8192,1,0,32.9341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,32768,1,0,75.0287
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,128,1,0,1.3596
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,256,1,0,2.5217
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,10240,1,0,41.5946
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,512,1,0,5.1911
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,1024,1,0,10.5070
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,12288,1,0,50.3946
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,1536,1,0,14.5297
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,2048,1,0,19.8864
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,1,1,0,0.2943
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,3072,1,0,29.3532
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,16,1,0,0.5386
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,32,1,0,0.7890
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,16384,1,0,68.7413
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,4096,1,0,32.0788
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,64,1,0,1.3737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,128,1,0,2.8587
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,256,1,0,5.1722
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,512,1,0,9.7617
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,6144,1,0,48.6870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,1024,1,0,19.7341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,1,1,0,0.3072
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,16,1,0,0.7933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,1536,1,0,28.7250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,32,1,0,1.3715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,2048,1,0,31.0141
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,64,1,0,2.5152
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,128,1,0,5.0789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,3072,1,0,47.2088
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,256,1,0,9.7444
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,512,1,0,19.1172
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,1,1,0,0.3616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,4096,1,0,63.7589
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,16,1,0,2.3056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,12288,1,0,100.6187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,1024,1,0,30.7683
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,32,1,0,3.0656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,64,1,0,5.2208
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,128,1,0,9.7284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,1536,1,0,46.2339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,1,1,0,0.4462
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,256,1,0,19.0431
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,16,1,0,2.5085
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,32,1,0,5.1505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,512,1,0,30.6599
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,64,1,0,9.7201
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,1,1,0,0.6647
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,128,1,0,19.0653
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,16,1,0,5.1538
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,32,1,0,9.7253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,256,1,0,30.5404
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,8192,1,0,131.3820
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,1024,1,0,61.1766
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,3072,1,0,94.5133
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,1,1,0,0.1943
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,64,1,0,19.0711
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,16,1,0,0.2681
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,64,1,0,0.3110
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,128,1,0,0.3337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,256,1,0,0.4371
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,32,1,0,0.2732
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,512,1,0,0.7228
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,128,1,0,30.4995
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,1024,1,0,1.1742
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,1536,1,0,2.3252
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,2048,1,0,2.1575
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,3072,1,0,3.1970
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,4096,1,0,4.5236
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,6144,1,0,6.3265
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,8192,1,0,8.7030
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,10240,1,0,10.7011
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,12288,1,0,13.1616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,1,1,0,0.2086
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,16,1,0,0.2724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,16384,1,0,17.6540
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,32,1,0,0.3098
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,64,1,0,0.3284
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,128,1,0,2.4452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,256,1,0,2.1950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,512,1,0,1.1534
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,256,1,0,60.8245
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,1024,1,0,2.1142
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,1536,1,0,3.0782
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,2048,1,0,4.3590
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,3072,1,0,6.3406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,4096,1,0,8.1217
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,32768,1,0,38.2172
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,6144,1,0,12.4980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,1,1,0,0.2119
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,8192,1,0,16.7547
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,16,1,0,0.3100
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,10240,1,0,20.9096
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,32,1,0,0.3273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,1024,1,0,122.6420
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,64,1,0,0.4308
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,128,1,0,0.6466
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,12288,1,0,25.5203
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,256,1,0,1.1425
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,512,1,0,2.0967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,1024,1,0,3.9685
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,1536,1,0,6.1566
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,2048,1,0,7.8223
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,16384,1,0,34.6766
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,3072,1,0,12.1129
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,4096,1,0,15.9588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,1,1,0,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,16,1,0,0.3264
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,6144,1,0,24.3942
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,64,1,0,0.6455
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,32,1,0,2.1727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,128,1,0,2.2561
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,256,1,0,2.0836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,8192,1,0,32.7992
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,512,1,0,4.2865
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,32768,1,0,67.3363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,1024,1,0,7.7199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,10240,1,0,41.3482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,1536,1,0,11.7507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,2048,1,0,15.4273
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,12288,1,0,50.1296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,1,1,0,0.2451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,16,1,0,0.4333
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,3072,1,0,23.8017
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,32,1,0,2.4641
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,64,1,0,1.1420
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,4096,1,0,31.8808
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,128,1,0,2.4638
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,256,1,0,4.2351
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,512,1,0,7.6822
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,6144,1,0,48.3306
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,1024,1,0,15.4684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,1536,1,0,23.2462
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,8192,1,0,57.9538
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,1,1,0,0.2539
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,2048,1,0,32.4051
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,16,1,0,0.6452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,10240,1,0,74.0968
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,32,1,0,2.2428
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,64,1,0,2.0676
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,128,1,0,4.2283
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,3072,1,0,47.0060
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,256,1,0,7.9677
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,512,1,0,15.4246
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,4096,1,0,56.0952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,1,1,0,0.2770
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,1024,1,0,30.5823
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,16,1,0,1.1382
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,32,1,0,2.0664
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,64,1,0,3.9244
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,128,1,0,7.6546
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,16384,1,0,121.6818
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,6144,1,0,85.4156
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,1536,1,0,49.0780
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,256,1,0,15.3996
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,1,1,0,0.3157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,2048,1,0,54.1056
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,16,1,0,2.0715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,32,1,0,4.2607
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,64,1,0,7.8683
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,512,1,0,30.4296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,128,1,0,15.1642
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,1,1,0,0.4315
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,3072,1,0,82.9394
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,16,1,0,4.8725
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,32,1,0,7.8888
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,1024,1,0,53.5280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,256,1,0,30.4146
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,64,1,0,15.1848
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,1,1,0,0.1850
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,16,1,0,0.2308
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,32,1,0,0.2462
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,64,1,0,0.2792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,128,1,0,0.2967
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,256,1,0,0.3832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,512,1,0,0.5446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,1024,1,0,0.9157
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,1536,1,0,1.3047
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,512,1,0,53.2613
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,2048,1,0,1.6840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,3072,1,0,2.4800
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,128,1,0,30.3321
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,1536,1,0,80.4514
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,4096,1,0,3.2644
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,6144,1,0,5.1945
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,1,1,0,0.1812
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,8192,1,0,6.5741
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,16,1,0,0.2480
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,10240,1,0,8.5350
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,32,1,0,0.2767
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,12288,1,0,10.3352
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,64,1,0,0.2940
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,16384,1,0,14.1384
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,128,1,0,0.3821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,256,1,0,53.4430
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,256,1,0,0.5336
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,512,1,0,0.8903
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,1024,1,0,1.6373
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,1536,1,0,2.3597
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,2048,1,0,3.4954
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,3072,1,0,4.6361
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,32768,1,0,30.7285
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,4096,1,0,6.4456
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,6144,1,0,9.6548
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,1,1,0,0.1951
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,8192,1,0,12.9887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,16,1,0,0.2861
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,10240,1,0,16.1564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,32,1,0,0.2882
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,64,1,0,2.2635
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,128,1,0,0.5298
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,12288,1,0,19.9650
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,256,1,0,0.8854
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,512,1,0,1.6099
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,1024,1,0,3.0159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,1536,1,0,4.7193
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,16384,1,0,27.1701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,2048,1,0,6.1542
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,3072,1,0,9.1996
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,4096,1,0,12.3751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,6144,1,0,18.7176
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,1,1,0,0.2021
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,16,1,0,0.2897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,32,1,0,0.3755
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,64,1,0,0.5278
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,8192,1,0,25.0544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,128,1,0,0.8811
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,256,1,0,1.6000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,32768,1,0,59.4761
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,512,1,0,3.3316
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,10240,1,0,31.9022
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,1024,1,0,5.8107
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,1536,1,0,8.6963
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,12288,1,0,38.7888
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,2048,1,0,11.8843
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,3072,1,0,18.0718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,16384,1,0,53.0887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,1,1,0,0.2253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,4096,1,0,24.1370
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,16,1,0,0.3758
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,32,1,0,0.5290
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,64,1,0,0.8790
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,128,1,0,1.5936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,256,1,0,2.9824
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,6144,1,0,36.9925
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,512,1,0,5.7511
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,1024,1,0,11.6634
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,1536,1,0,17.4727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,8192,1,0,50.0419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,1,1,0,0.2260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,2048,1,0,23.0924
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,16,1,0,0.5312
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,32,1,0,0.8797
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,64,1,0,1.5921
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,10240,1,0,63.3670
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,128,1,0,2.9704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,256,1,0,6.0092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,3072,1,0,35.6453
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,512,1,0,11.4286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,12288,1,0,77.2359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,1,1,0,0.2578
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,16,1,0,0.8826
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,4096,1,0,48.1762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,1024,1,0,22.9902
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,32,1,0,1.5941
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,64,1,0,3.2962
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,128,1,0,5.9825
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,256,1,0,11.4117
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,1536,1,0,34.5140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,1,1,0,0.2936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,16,1,0,1.5977
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,32,1,0,3.2979
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,64,1,0,5.7675
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,2048,1,0,45.9976
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,512,1,0,22.8676
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,1,1,0,0.4023
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,128,1,0,11.5655
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,16,1,0,3.2950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,32,1,0,5.7395
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,256,1,0,22.8827
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,64,1,0,11.5704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,1,1,0,0.1787
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,1024,1,0,45.5850
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,16,1,0,0.2280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,32,1,0,0.2364
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,64,1,0,0.2588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,128,1,0,0.2670
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,128,1,0,22.5861
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,256,1,0,0.3559
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,512,1,0,0.4861
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,1024,1,0,2.2092
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,1536,1,0,2.5374
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,3072,1,0,2.1328
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,2048,1,0,1.4357
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,4096,1,0,3.1295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,6144,1,0,4.2027
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,8192,1,0,5.9272
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,512,1,0,53.3955
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,10240,1,0,7.3914
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,1,1,0,0.1735
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,12288,1,0,8.6791
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,16,1,0,0.2363
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,256,1,0,45.3066
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,32,1,0,0.2684
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,64,1,0,0.2669
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,16384,1,0,12.2822
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,128,1,0,2.1969
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,256,1,0,0.4813
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,512,1,0,2.2122
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,1024,1,0,1.3928
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,1536,1,0,2.0097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,2048,1,0,2.6130
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,3072,1,0,3.9311
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,32768,1,0,26.8846
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,4096,1,0,5.5313
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,6144,1,0,8.0170
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,8192,1,0,10.8989
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,1,1,0,0.1815
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,16,1,0,0.2671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,10240,1,0,13.9960
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,32,1,0,0.2626
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,64,1,0,0.3466
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,128,1,0,2.1832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,256,1,0,0.7651
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,12288,1,0,16.9728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,512,1,0,1.3611
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,1024,1,0,2.8873
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,1536,1,0,4.0588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,16384,1,0,23.3849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,2048,1,0,4.9725
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,3072,1,0,7.8135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,4096,1,0,10.4413
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,6144,1,0,15.8718
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,1,1,0,0.1933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,16,1,0,0.2679
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,8192,1,0,21.4951
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,32768,1,0,51.9729
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,64,1,0,0.4741
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,128,1,0,0.7599
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,32,1,0,2.1811
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,256,1,0,1.3495
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,10240,1,0,27.2798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,512,1,0,2.5140
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,12288,1,0,33.1500
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,1024,1,0,4.8774
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,1536,1,0,7.3033
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,2048,1,0,9.9317
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,16384,1,0,45.6570
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,3072,1,0,15.2523
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,4096,1,0,20.5830
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,1,1,0,0.2091
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,16,1,0,0.3482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,32,1,0,2.1980
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,64,1,0,0.7556
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,128,1,0,1.3507
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,256,1,0,2.8569
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,512,1,0,5.1122
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,6144,1,0,31.3537
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,1024,1,0,9.8027
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,1536,1,0,14.4495
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,8192,1,0,42.5560
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,2048,1,0,19.5477
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,1,1,0,0.2125
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,10240,1,0,54.0048
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,16,1,0,0.4762
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,32,1,0,2.2017
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,3072,1,0,29.9992
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,64,1,0,1.3436
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,128,1,0,2.5012
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,12288,1,0,65.9731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,256,1,0,5.1360
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,4096,1,0,40.6841
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,512,1,0,9.7229
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,1,1,0,0.2359
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,16,1,0,0.7620
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,1024,1,0,22.1642
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,32,1,0,1.3466
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,1536,1,0,28.8642
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,64,1,0,2.4939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,6144,1,0,62.2898
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,128,1,0,4.8058
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,256,1,0,9.6905
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,2048,1,0,38.6931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,1,1,0,0.2712
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,16,1,0,1.3474
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,512,1,0,19.1062
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,32,1,0,2.4984
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,8192,1,0,84.6939
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,3072,1,0,59.6308
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,64,1,0,6.0289
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,128,1,0,9.6678
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,1024,1,0,38.0756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,1,1,0,0.3756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,16,1,0,2.8375
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,256,1,0,19.0822
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,32,1,0,5.0701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,64,1,0,9.6672
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,1536,1,0,57.2896
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,1,1,0,0.1724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,16,1,0,0.2176
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,128,1,0,19.0633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,64,1,0,0.2612
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,128,1,0,0.2588
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,512,1,0,37.6625
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,32,1,0,0.2305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,256,1,0,0.3343
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,512,1,0,0.4633
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,1024,1,0,0.7414
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,2048,1,0,1.3147
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,1536,1,0,2.2345
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,3072,1,0,1.9381
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,2048,1,0,76.9338
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,4096,1,0,2.5516
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,6144,1,0,4.1754
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,8192,1,0,5.1858
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,1,1,0,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,10240,1,0,6.8583
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,256,1,0,38.0088
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,12288,1,0,8.2846
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,16,1,0,0.2328
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,32,1,0,0.2548
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,64,1,0,0.2544
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,16384,1,0,11.3089
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,128,1,0,0.3319
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,512,1,0,0.7207
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,256,1,0,2.1949
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,1024,1,0,1.2662
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,1536,1,0,1.8207
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,2048,1,0,2.9042
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,3072,1,0,3.9162
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,32768,1,0,24.8213
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,4096,1,0,4.8076
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,6144,1,0,7.5016
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,1,1,0,0.1914
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,8192,1,0,10.1885
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,16,1,0,0.2550
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,32,1,0,0.2535
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,10240,1,0,12.8213
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,64,1,0,0.3313
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,128,1,0,0.4486
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,12288,1,0,15.5816
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,256,1,0,0.7090
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,512,1,0,1.2450
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,16384,1,0,21.4853
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,1024,1,0,2.3052
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,1536,1,0,3.7216
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,2048,1,0,4.5051
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,3072,1,0,6.9072
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,4096,1,0,9.5276
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,6144,1,0,14.4426
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,1,1,0,0.1952
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,8192,1,0,19.4158
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,16,1,0,0.2537
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,32768,1,0,48.4054
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,32,1,0,0.3315
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,10240,1,0,24.8710
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,64,1,0,0.4475
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,128,1,0,0.7057
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,256,1,0,1.2321
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,512,1,0,3.0605
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,12288,1,0,30.3566
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,1024,1,0,4.4060
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,1536,1,0,6.5996
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,2048,1,0,9.8941
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,16384,1,0,41.9137
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,3072,1,0,13.8188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,1,1,0,0.2165
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,4096,1,0,18.6739
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,16,1,0,0.3268
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,32,1,0,0.4481
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,64,1,0,2.2195
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,6144,1,0,28.5934
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,128,1,0,1.5088
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,256,1,0,2.2568
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,512,1,0,4.3690
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,8192,1,0,38.8308
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,1024,1,0,8.8417
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,1536,1,0,13.2701
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,10240,1,0,50.2067
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,2048,1,0,17.6447
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,32768,1,0,94.7331
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,1,1,0,0.2198
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,16,1,0,2.2745
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,32,1,0,0.7068
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,12288,1,0,60.3085
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,64,1,0,1.5603
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,128,1,0,2.2607
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,3072,1,0,27.2163
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,256,1,0,4.6335
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,512,1,0,8.8785
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,4096,1,0,36.9809
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,1,1,0,0.2304
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,16,1,0,0.7077
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,1024,1,0,17.1576
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,32,1,0,1.2224
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,64,1,0,3.1561
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,1536,1,0,26.0653
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,128,1,0,4.6440
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,256,1,0,10.9720
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,2048,1,0,38.1976
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,1,1,0,0.2668
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,16,1,0,1.2253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,512,1,0,17.2160
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,32,1,0,2.9520
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,64,1,0,4.6173
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,128,1,0,8.5522
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,3072,1,0,54.0251
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,8192,1,0,77.2042
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,1024,1,0,34.3495
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,1,1,0,0.3628
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,16,1,0,2.2512
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,256,1,0,17.1973
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,32,1,0,4.6266
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,64,1,0,8.7740
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,1,1,0,0.1603
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,1536,1,0,51.6764
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,16,1,0,0.2240
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,32,1,0,0.2302
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,64,1,0,0.2543
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,128,1,0,0.2505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,128,1,0,16.9534
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,512,1,0,33.9649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,256,1,0,0.3238
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,512,1,0,0.4482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,1024,1,0,0.7132
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,1536,1,0,2.4139
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,2048,1,0,1.2624
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,3072,1,0,1.8505
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,4096,1,0,2.5217
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,6144,1,0,4.0025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,8192,1,0,5.2559
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,10240,1,0,6.2649
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,1,1,0,0.1792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,16,1,0,0.2287
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,12288,1,0,7.6432
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,32,1,0,0.2616
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,256,1,0,42.3950
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,16384,1,0,10.8007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,64,1,0,0.2551
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,128,1,0,0.3185
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,256,1,0,0.4374
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,512,1,0,0.6933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,1024,1,0,1.2101
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,1536,1,0,1.7358
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,2048,1,0,2.2482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,3072,1,0,3.7197
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,32768,1,0,24.1155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,4096,1,0,4.8591
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,6144,1,0,7.2381
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,8192,1,0,9.7035
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,1,1,0,0.1727
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,16,1,0,0.2527
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,32,1,0,0.2524
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,10240,1,0,12.0483
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,64,1,0,0.3199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,12288,1,0,14.6840
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,128,1,0,0.4361
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,256,1,0,0.6821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,512,1,0,2.2523
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,1024,1,0,2.1798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,16384,1,0,20.5652
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,1536,1,0,3.2211
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,2048,1,0,4.2689
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,3072,1,0,6.5564
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,4096,1,0,9.0728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,6144,1,0,13.7690
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,1,1,0,0.1953
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,16,1,0,0.2492
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,8192,1,0,18.6687
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,32,1,0,0.3216
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,64,1,0,0.4337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,128,1,0,0.6796
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,10240,1,0,23.7242
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,32768,1,0,46.3304
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,256,1,0,1.1721
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,12288,1,0,28.7454
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,512,1,0,2.1483
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,1024,1,0,4.1671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,1536,1,0,6.5323
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,2048,1,0,8.5665
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,16384,1,0,39.9902
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,3072,1,0,13.1396
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,1,1,0,0.2166
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,4096,1,0,17.7784
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,16,1,0,0.3179
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,32,1,0,2.1854
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,64,1,0,0.6801
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,128,1,0,1.1662
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,6144,1,0,26.9552
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,256,1,0,2.8845
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,512,1,0,4.1292
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,1024,1,0,8.3891
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,8192,1,0,36.8817
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,1536,1,0,12.5599
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,2048,1,0,16.5159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,1,1,0,0.2093
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,10240,1,0,46.9521
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,16,1,0,0.4355
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,32,1,0,0.6789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,64,1,0,1.1707
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,3072,1,0,25.8167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,128,1,0,3.0299
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,12288,1,0,57.4650
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,256,1,0,4.4097
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,512,1,0,8.3557
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,4096,1,0,34.8656
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,1,1,0,0.2322
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,16,1,0,0.6807
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,1024,1,0,16.4188
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,32,1,0,1.1644
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,64,1,0,2.1338
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,1536,1,0,24.4430
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,128,1,0,4.1079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,256,1,0,8.1545
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,2048,1,0,33.0293
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,1,1,0,0.2589
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,512,1,0,16.2942
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,16,1,0,1.1737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,32,1,0,2.1371
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,64,1,0,4.1068
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,128,1,0,8.2931
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,3072,1,0,51.2239
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,1024,1,0,38.9176
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,1,1,0,2.1231
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,256,1,0,16.2715
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,16,1,0,2.4821
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,4096,1,0,69.4936
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,32,1,0,4.4833
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,1536,1,0,48.8210
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,64,1,0,8.2916
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,1,1,0,0.1604
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,512,1,0,32.2444
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,16,1,0,0.2158
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,32,1,0,0.2260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,128,1,0,16.2274
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,64,1,0,0.2608
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,128,1,0,0.2479
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,256,1,0,0.3171
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,512,1,0,2.1960
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,1024,1,0,0.7029
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,1536,1,0,0.9648
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,2048,1,0,2.2884
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,3072,1,0,1.8087
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,256,1,0,32.1260
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,4096,1,0,2.3870
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,6144,1,0,3.9061
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,8192,1,0,5.1832
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,10240,1,0,6.3961
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,1024,1,0,64.6338
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,1,1,0,0.1737
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,12288,1,0,7.4634
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,16,1,0,0.2280
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,16384,1,0,10.6025
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,32,1,0,0.2495
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,64,1,0,0.2506
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,128,1,0,2.1798
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,256,1,0,0.4354
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,512,1,0,0.6799
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,1024,1,0,1.1869
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,1536,1,0,1.6933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,2048,1,0,2.1897
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,3072,1,0,3.6187
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,4096,1,0,4.7296
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,32768,1,0,23.6425
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,6144,1,0,7.0989
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,512,1,0,64.0182
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,8192,1,0,9.2671
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,1,1,0,0.1738
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,16,1,0,0.2533
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,32,1,0,0.2605
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,10240,1,0,11.7454
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,12288,1,0,14.5584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,64,1,0,0.3145
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,256,1,0,0.6692
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,128,1,0,2.1765
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,512,1,0,1.1635
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,1024,1,0,2.1278
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,16384,1,0,20.1012
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,1536,1,0,3.1292
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,2048,1,0,4.4415
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,3072,1,0,6.3686
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,4096,1,0,9.2256
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,6144,1,0,13.2248
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,1,1,0,0.1889
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,16,1,0,0.2462
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,8192,1,0,18.0235
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,32,1,0,0.3160
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,64,1,0,2.1687
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,32768,1,0,45.3703
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,128,1,0,0.6675
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,10240,1,0,23.1488
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,256,1,0,1.1432
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,512,1,0,2.0955
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,12288,1,0,28.2221
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,1024,1,0,4.3324
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,1536,1,0,6.0695
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,2048,1,0,8.3001
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,16384,1,0,39.0421
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,3072,1,0,12.6095
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,1,1,0,0.2141
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,4096,1,0,17.3212
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,32,1,0,0.5305
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,16,1,0,2.1756
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,64,1,0,0.6655
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,128,1,0,1.1427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,256,1,0,2.0714
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,6144,1,0,26.2538
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,512,1,0,4.3167
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,1024,1,0,8.1569
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,8192,1,0,36.0135
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,1536,1,0,12.1694
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,2048,1,0,16.2521
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,1,1,0,0.2065
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,16,1,0,0.4286
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,32,1,0,0.6669
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,3072,1,0,25.0704
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,64,1,0,1.1382
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,128,1,0,3.0159
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,256,1,0,3.9974
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,12288,1,0,56.1602
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,512,1,0,8.2277
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,4096,1,0,35.7518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,1024,1,0,15.9575
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,1,1,0,0.2242
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,16,1,0,0.6849
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,32,1,0,1.1434
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,64,1,0,2.0767
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,6144,1,0,52.5253
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,128,1,0,3.9881
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,1536,1,0,23.9561
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,256,1,0,8.0971
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,2048,1,0,32.0978
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,1,1,0,0.2567
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,512,1,0,15.8261
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,16,1,0,1.1441
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,32,1,0,2.0789
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,64,1,0,4.2796
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,128,1,0,10.2864
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,1024,1,0,31.5315
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,256,1,0,15.8064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,1,1,0,2.1180
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,16,1,0,2.0818
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,32,1,0,4.2644
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,1536,1,0,47.5390
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,4096,1,0,71.0406
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,64,1,0,10.3487
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,512,1,0,31.3246
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,1,1,0,0.1524
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,16,1,0,0.2155
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,32,1,0,0.2233
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,128,1,0,15.7400
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,64,1,0,0.2488
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,2048,1,0,63.7876
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,128,1,0,0.2581
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,256,1,0,0.3162
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,512,1,0,0.4397
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,1024,1,0,0.6933
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,1536,1,0,0.9579
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,2048,1,0,2.2651
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,3072,1,0,2.3455
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,4096,1,0,2.3523
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,256,1,0,31.2664
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,1024,1,0,62.5708
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,8192,1,0,4.7615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,6144,1,0,3.5443
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,10240,1,0,6.3124
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,12288,1,0,7.6412
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,1,1,0,0.1650
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,16,1,0,0.2227
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,16384,1,0,10.5169
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,32,1,0,0.2584
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,64,1,0,0.2452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,128,1,0,2.1877
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,256,1,0,0.4277
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,512,1,0,2.3538
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,1024,1,0,1.1731
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,1536,1,0,1.6724
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,2048,1,0,2.3389
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,3072,1,0,3.2679
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,32768,1,0,23.3606
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,512,1,0,62.3339
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,4096,1,0,4.8541
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,6144,1,0,6.7176
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,8192,1,0,9.3792
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,1,1,0,0.1728
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,16,1,0,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,10240,1,0,11.8026
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,32,1,0,0.2446
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,12288,1,0,14.3733
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,128,1,0,0.4278
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,64,1,0,2.1749
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,256,1,0,0.6641
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,512,1,0,1.1535
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,1024,1,0,2.5000
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,16384,1,0,19.8452
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,1536,1,0,3.0915
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,2048,1,0,4.3784
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,3072,1,0,6.5604
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,4096,1,0,8.5173
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,6144,1,0,13.2411
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,1,1,0,0.1776
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,16,1,0,0.2482
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,8192,1,0,18.0118
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,32,1,0,0.3125
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,64,1,0,0.4232
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,32768,1,0,44.9397
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,128,1,0,0.8064
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,10240,1,0,22.6453
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,256,1,0,1.3836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,512,1,0,2.4403
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,1024,1,0,3.9944
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,12288,1,0,27.8490
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,1536,1,0,6.2276
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,2048,1,0,8.2375
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,3072,1,0,12.4341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,16384,1,0,38.9451
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,4096,1,0,16.8416
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,1,1,0,0.2117
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,16,1,0,0.3427
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,32,1,0,0.4235
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,64,1,0,2.2079
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,128,1,0,1.1337
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,256,1,0,2.0518
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,6144,1,0,26.1241
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,512,1,0,4.9195
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,1024,1,0,8.0659
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,8192,1,0,35.5062
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,1536,1,0,13.5968
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,2048,1,0,17.3887
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,1,1,0,0.2015
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,16,1,0,0.4250
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,10240,1,0,45.1940
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,32,1,0,2.2093
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,64,1,0,1.1271
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,128,1,0,2.0521
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,3072,1,0,24.7007
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,256,1,0,3.9419
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,512,1,0,8.0150
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,4096,1,0,33.4615
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,1,1,0,0.2281
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,16,1,0,0.6617
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,32,1,0,1.1341
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,1024,1,0,18.7517
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,64,1,0,2.0487
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,1536,1,0,23.6131
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,128,1,0,3.9347
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,6144,1,0,51.8295
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,256,1,0,7.7751
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,2048,1,0,31.6448
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,1,1,0,0.2590
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,16,1,0,1.1338
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,512,1,0,15.6018
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,32,1,0,2.0519
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,64,1,0,4.2201
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,128,1,0,7.7379
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,1,1,0,0.3519
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,1024,1,0,31.0067
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,256,1,0,15.5512
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,16,1,0,2.0581
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,32,1,0,4.2164
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,64,1,0,7.9836
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,4096,1,0,69.9214
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,1536,1,0,46.7199
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,512,1,0,30.8543
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,128,1,0,15.5059
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,256,1,0,30.7342
VLLM,0.17.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,512,1,0,61.4814
